forked from OSchip/llvm-project
60249 lines
2.4 MiB
60249 lines
2.4 MiB
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
|
|
|
|
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi0:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi3:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi4:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi5:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi6:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi7:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi8:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi9:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi10:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi11:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi12:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi13:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi14:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi15:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi16:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi17:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi18:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi19:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi20:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi21:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi22:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi23:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi24:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi25:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi26:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi27:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi28:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi29:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi30:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi31:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi32:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi33:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi34:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi35:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi36:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi37:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi38:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi39:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi40:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi41:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi42:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi43:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi44:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi45:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi46:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi47:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi48:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi49:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi50:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi51:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi52:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi53:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi54:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi55:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi56:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi57:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi58:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi59:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi60:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi61:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi62:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi63:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp eq <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi64:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi65:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi66:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp eq <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi67:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi68:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi69:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp eq <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi70:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi71:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi72:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
|
|
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp eq <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi73:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi74:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi75:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp eq <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi76:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi77:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi78:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi79:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi80:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi81:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi82:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi83:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi84:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi85:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi86:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi87:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi88:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi89:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi90:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi91:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi92:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi93:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi94:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi95:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi96:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi97:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi98:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi99:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp eq <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi100:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi101:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi102:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi103:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi104:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi105:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi106:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi107:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi108:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi109:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi110:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi111:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi112:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi113:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi114:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi115:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi116:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi117:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi118:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi119:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi120:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi121:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi122:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi123:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi124:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi125:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi126:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi127:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi128:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi129:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi130:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi131:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi132:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi133:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi134:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi135:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi136:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi137:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi138:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi139:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi140:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi141:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi142:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi143:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi144:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi145:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi146:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi147:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi148:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi149:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi150:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi151:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi152:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi153:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi154:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi155:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi156:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi157:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi158:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi159:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi160:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi161:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi162:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi163:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp eq <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi164:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi165:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi166:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm3, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm2, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
|
|
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp eq <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi167:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi168:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi169:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm1
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %eax, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp eq <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi170:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi171:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi172:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm3
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm5, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp eq <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi173:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi174:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi175:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm1, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp eq <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi176:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi177:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi178:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi179:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi180:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi181:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi182:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi183:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi184:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi185:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi186:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi187:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi188:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi189:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi190:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi191:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi192:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi193:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi194:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi195:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi196:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi197:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi198:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi199:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi200:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi201:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi202:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi203:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi204:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi205:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi206:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi207:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi208:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi209:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi210:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi211:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi212:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi213:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi214:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi215:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi216:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi217:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi218:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi219:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi220:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi221:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi222:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi223:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi224:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi225:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi226:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi227:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi228:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi229:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi230:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi231:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi232:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi233:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi234:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi235:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi236:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi237:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi238:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi239:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi240:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi241:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi242:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi243:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi244:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi245:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi246:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi247:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi248:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi249:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi250:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi251:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi252:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi253:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi254:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi255:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi256:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi257:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi258:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi259:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi260:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi261:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi262:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi263:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi264:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi265:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi266:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi267:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi268:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi269:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi270:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi271:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi272:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi273:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi274:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi275:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi276:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi277:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi278:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi279:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi280:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi281:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi282:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi283:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi284:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi285:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi286:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi287:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi288:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi289:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi290:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi291:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi292:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi293:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi294:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi295:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi296:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi297:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi298:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi299:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi300:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi301:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi302:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi303:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi304:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi305:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi306:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi307:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi308:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi309:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi310:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi311:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi312:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi313:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi314:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi315:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi316:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi317:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi318:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi319:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi320:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi321:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi322:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi323:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi324:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi325:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi326:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi327:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi328:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi329:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi330:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi331:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi332:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi333:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi334:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi335:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi336:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi337:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi338:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi339:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi340:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi341:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi342:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi343:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi344:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi345:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi346:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi347:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi348:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi349:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi350:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi351:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi352:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi353:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi354:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi355:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi356:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi357:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi358:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi359:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi360:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi361:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi362:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi363:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi364:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi365:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi366:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi367:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi368:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi369:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi370:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi371:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi372:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi373:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi374:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi375:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi376:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi377:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi378:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi379:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp eq <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi380:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi381:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi382:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi383:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi384:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi385:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi386:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi387:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi388:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi389:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi390:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi391:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi392:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi393:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi394:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi395:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi396:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi397:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi398:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi399:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi400:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi401:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi402:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi403:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi404:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi405:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi406:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi407:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi408:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi409:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi410:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi411:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi412:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi413:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi414:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi415:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi416:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi417:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi418:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi419:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi420:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi421:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi422:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi423:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi424:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi425:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi426:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi427:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi428:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi429:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi430:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi431:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi432:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi433:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi434:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi435:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi436:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi437:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi438:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi439:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi440:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi441:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi442:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi443:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi444:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi445:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi446:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi447:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi448:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi449:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi450:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi451:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp eq <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi452:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi453:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi454:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi455:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi456:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi457:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi458:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi459:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi460:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi461:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi462:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi463:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi464:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi465:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi466:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi467:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi468:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi469:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi470:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi471:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi472:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi473:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi474:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi475:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi476:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi477:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi478:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi479:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi480:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi481:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi482:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi483:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi484:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi485:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi486:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi487:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi488:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi489:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi490:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi491:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi492:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi493:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi494:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi495:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi496:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi497:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi498:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi499:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi500:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi501:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi502:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi503:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi504:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi505:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi506:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi507:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi508:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi509:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi510:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi511:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi512:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi513:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi514:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi515:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sgt <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi516:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi517:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi518:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp sgt <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi519:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi520:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi521:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp sgt <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi522:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi523:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi524:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
|
|
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp sgt <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi525:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi526:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi527:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp sgt <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi528:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi529:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi530:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi531:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi532:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi533:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi534:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi535:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi536:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi537:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi538:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi539:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi540:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi541:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi542:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi543:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi544:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi545:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi546:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi547:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi548:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi549:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi550:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi551:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sgt <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi552:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi553:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi554:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi555:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi556:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi557:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi558:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi559:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi560:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi561:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi562:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi563:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi564:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi565:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi566:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi567:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi568:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi569:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi570:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi571:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi572:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi573:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi574:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi575:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi576:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi577:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi578:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi579:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi580:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi581:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi582:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi583:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi584:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi585:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi586:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi587:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi588:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi589:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi590:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi591:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi592:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi593:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi594:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi595:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi596:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi597:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi598:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi599:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi600:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi601:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi602:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi603:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi604:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi605:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi606:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi607:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi608:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi609:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi610:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi611:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi612:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi613:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi614:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi615:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sgt <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi616:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi617:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi618:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm3, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm2, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
|
|
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp sgt <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi619:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi620:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi621:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm1
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %eax, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp sgt <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi622:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi623:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi624:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm3
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm5, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp sgt <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi625:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi626:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi627:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm1, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp sgt <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi628:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi629:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi630:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi631:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi632:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi633:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi634:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi635:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi636:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi637:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi638:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi639:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi640:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi641:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi642:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi643:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi644:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi645:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi646:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi647:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi648:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi649:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi650:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi651:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi652:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi653:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi654:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi655:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi656:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi657:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi658:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi659:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi660:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi661:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi662:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi663:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi664:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi665:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi666:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi667:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi668:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi669:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi670:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi671:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi672:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi673:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi674:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi675:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi676:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi677:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi678:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi679:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi680:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi681:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi682:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi683:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi684:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi685:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi686:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi687:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi688:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi689:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi690:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi691:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi692:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi693:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi694:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi695:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi696:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi697:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi698:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi699:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi700:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi701:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi702:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi703:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi704:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi705:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi706:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi707:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi708:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi709:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi710:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi711:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi712:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi713:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi714:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi715:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi716:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi717:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi718:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi719:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi720:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi721:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi722:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi723:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi724:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi725:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi726:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi727:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi728:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi729:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi730:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi731:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi732:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi733:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi734:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi735:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi736:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi737:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi738:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi739:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi740:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi741:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi742:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi743:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi744:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi745:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi746:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi747:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi748:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi749:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi750:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi751:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi752:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi753:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi754:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi755:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi756:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi757:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi758:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi759:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi760:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi761:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi762:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi763:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi764:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi765:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi766:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi767:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi768:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi769:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi770:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi771:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi772:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi773:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi774:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi775:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi776:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi777:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi778:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi779:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi780:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi781:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi782:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi783:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi784:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi785:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi786:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi787:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi788:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi789:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi790:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi791:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi792:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi793:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi794:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi795:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi796:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi797:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi798:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi799:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi800:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi801:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi802:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi803:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi804:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi805:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi806:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi807:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi808:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi809:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi810:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi811:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi812:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi813:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi814:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi815:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi816:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi817:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi818:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi819:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi820:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi821:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi822:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi823:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi824:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi825:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi826:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi827:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi828:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi829:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi830:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi831:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sgt <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi832:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi833:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi834:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi835:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi836:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi837:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi838:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi839:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi840:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi841:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi842:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi843:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi844:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi845:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi846:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi847:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi848:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi849:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi850:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi851:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi852:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi853:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi854:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi855:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi856:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi857:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi858:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi859:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi860:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi861:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi862:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi863:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi864:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi865:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi866:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi867:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi868:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi869:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi870:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi871:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi872:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi873:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi874:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi875:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi876:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi877:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi878:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi879:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi880:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi881:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi882:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi883:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi884:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi885:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi886:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi887:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi888:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi889:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi890:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi891:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi892:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi893:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi894:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi895:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi896:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi897:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi898:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi899:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi900:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi901:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi902:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi903:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sgt <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi904:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi905:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi906:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi907:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi908:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi909:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi910:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi911:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi912:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi913:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi914:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi915:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi916:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi917:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi918:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi919:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi920:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi921:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi922:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi923:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi924:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi925:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi926:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi927:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi928:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi929:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi930:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi931:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi932:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi933:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi934:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi935:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi936:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi937:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi938:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi939:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi940:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi941:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi942:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi943:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi944:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi945:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi946:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi947:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi948:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi949:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi950:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi951:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi952:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi953:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi954:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi955:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi956:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi957:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi958:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi959:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi960:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi961:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi962:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi963:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi964:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi965:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi966:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi967:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp sge <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi968:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi969:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi970:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp sge <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi971:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi972:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi973:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp sge <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi974:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi975:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi976:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp sge <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi977:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi978:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi979:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp sge <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi980:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi981:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi982:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi983:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi984:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi985:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi986:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi987:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi988:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi989:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi990:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi991:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi992:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi993:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi994:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi995:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi996:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi997:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi998:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi999:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1000:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1001:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1002:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1003:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp sge <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1004:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1005:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1006:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1007:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1008:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1009:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1010:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1011:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1012:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1013:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1014:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1015:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1016:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1017:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1018:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1019:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1020:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1021:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1022:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1023:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1024:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1025:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1026:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1027:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1028:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1029:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1030:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1031:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1032:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1033:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1034:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1035:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1036:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1037:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1038:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1039:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1040:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1041:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1042:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1043:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1044:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1045:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1046:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1047:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1048:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1049:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1050:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1051:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1052:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1053:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1054:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1055:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1056:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1057:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1058:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1059:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1060:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1061:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1062:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1063:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1064:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1065:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1066:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1067:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp sge <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1068:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1069:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1070:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm3, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm2, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp sge <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1071:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1072:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1073:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm1
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
|
|
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp sge <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1074:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1075:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1076:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm3
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm5, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp sge <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1077:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1078:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1079:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm1, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
|
|
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5
|
|
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
|
|
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp sge <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1080:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1081:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1082:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1083:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1084:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1085:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1086:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1087:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1088:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1089:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1090:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1091:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1092:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1093:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1094:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1095:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1096:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1097:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1098:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1099:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1100:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1101:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1102:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1103:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1104:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1105:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1106:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1107:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1108:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1109:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1110:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1111:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1112:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1113:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1114:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1115:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1116:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1117:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1118:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1119:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1120:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1121:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1122:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1123:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1124:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1125:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1126:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1127:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1128:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1129:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1130:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1131:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1132:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1133:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1134:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1135:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1136:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1137:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1138:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1139:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1140:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1141:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1142:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1143:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1144:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1145:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1146:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1147:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1148:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1149:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1150:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1151:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1152:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1153:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1154:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1155:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1156:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1157:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1158:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1159:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1160:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1161:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1162:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1163:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1164:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1165:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1166:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1167:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1168:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1169:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1170:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1171:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1172:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1173:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1174:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1175:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1176:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1177:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1178:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1179:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1180:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1181:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1182:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1183:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %zmm1
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1184:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1185:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1186:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1187:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1188:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1189:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1190:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1191:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %zmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1192:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1193:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1194:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1195:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1196:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1197:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1198:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1199:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1200:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1201:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1202:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1203:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1204:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1205:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1206:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1207:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1208:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1209:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1210:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1211:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1212:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1213:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1214:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1215:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1216:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1217:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1218:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1219:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1220:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1221:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1222:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1223:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1224:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1225:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1226:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1227:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1228:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1229:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1230:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1231:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rdi), %zmm1
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1232:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1233:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1234:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1235:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1236:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1237:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1238:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1239:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastd (%rsi), %zmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1240:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1241:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1242:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1243:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1244:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1245:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1246:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1247:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1248:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1249:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1250:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1251:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1252:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1253:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1254:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1255:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1256:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1257:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1258:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1259:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1260:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1261:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1262:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1263:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1264:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1265:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1266:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1267:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1268:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1269:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1270:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1271:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1272:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1273:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1274:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1275:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1276:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1277:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1278:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1279:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1280:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1281:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1282:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1283:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp sge <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1284:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1285:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1286:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1287:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1288:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1289:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1290:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1291:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1292:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1293:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1294:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1295:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1296:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1297:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1298:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1299:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1300:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1301:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1302:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1303:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1304:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1305:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1306:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1307:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1308:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1309:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1310:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1311:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1312:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1313:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1314:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1315:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1316:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1317:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1318:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1319:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1320:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1321:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1322:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1323:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1324:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1325:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1326:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1327:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1328:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1329:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1330:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1331:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1332:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1333:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1334:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1335:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1336:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1337:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1338:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1339:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1340:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1341:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1342:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1343:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1344:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1345:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1346:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1347:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1348:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1349:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1350:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1351:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1352:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1353:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1354:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1355:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp sge <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1356:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1357:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1358:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1359:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1360:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1361:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1362:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1363:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1364:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1365:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1366:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1367:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1368:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1369:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1370:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1371:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1372:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1373:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1374:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1375:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1376:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1377:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1378:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1379:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1380:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1381:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1382:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1383:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1384:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1385:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1386:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1387:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1388:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1389:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1390:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1391:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1392:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1393:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1394:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1395:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1396:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1397:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1398:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1399:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1400:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1401:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1402:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1403:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1404:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1405:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1406:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1407:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1408:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1409:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1410:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1411:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%1 = bitcast <2 x i64> %__b to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1412:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1413:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1414:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1415:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1416:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1417:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1418:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1419:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <16 x i8>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <16 x i8>
|
|
%2 = icmp ult <16 x i8> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1420:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1421:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1422:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp ult <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1423:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1424:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1425:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp ult <32 x i8> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1426:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1427:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1428:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%1 = bitcast <4 x i64> %__b to <32 x i8>
|
|
%2 = icmp ult <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1429:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1430:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1431:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <32 x i8>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <32 x i8>
|
|
%2 = icmp ult <32 x i8> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1432:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1433:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1434:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1435:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1436:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1437:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1438:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1439:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1440:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1441:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1442:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1443:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1444:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1445:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1446:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1447:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1448:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1449:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1450:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1451:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1452:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%1 = bitcast <2 x i64> %__b to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1453:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1454:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1455:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <8 x i16>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <8 x i16>
|
|
%2 = icmp ult <8 x i16> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1456:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1457:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1458:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1459:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1460:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1461:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1462:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1463:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1464:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1465:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1466:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1467:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1468:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1469:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1470:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1471:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1472:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1473:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1474:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1475:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1476:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1477:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1478:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1479:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1480:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1481:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1482:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1483:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1484:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1485:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1486:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1487:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1488:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1489:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1490:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1491:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1492:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1493:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1494:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1495:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1496:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1497:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1498:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1499:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1500:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1501:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1502:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1503:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1504:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1505:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1506:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1507:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1508:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1509:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1510:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1511:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%1 = bitcast <4 x i64> %__b to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1512:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1513:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1514:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1515:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1516:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1517:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1518:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1519:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <16 x i16>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <16 x i16>
|
|
%2 = icmp ult <16 x i16> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1520:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1521:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1522:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm3, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm2, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4
|
|
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp ult <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1523:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1524:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1525:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm1
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3
|
|
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp ult <32 x i16> %0, %1
|
|
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1526:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1527:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1528:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm2, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm3
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm4
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm6, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vmovq %xmm7, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm5, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vmovq %xmm8, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
|
|
; NoVLX-NEXT: vmovq %xmm1, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3
|
|
; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
|
|
; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2
|
|
; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
|
|
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm2
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%1 = bitcast <8 x i64> %__b to <32 x i16>
|
|
%2 = icmp ult <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1529:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1530:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1531:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm1, %rax
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: movq %rax, %rdx
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
|
|
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
|
|
; NoVLX-NEXT: shrq $32, %rdx
|
|
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm4, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm3, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vmovq %xmm0, %rcx
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5
|
|
; NoVLX-NEXT: movl %ecx, %eax
|
|
; NoVLX-NEXT: shrl $16, %eax
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: movq %rcx, %rax
|
|
; NoVLX-NEXT: shrq $32, %rax
|
|
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
|
|
; NoVLX-NEXT: shrq $48, %rcx
|
|
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
|
|
; NoVLX-NEXT: movl %eax, %ecx
|
|
; NoVLX-NEXT: shrl $16, %ecx
|
|
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: movq %rax, %rcx
|
|
; NoVLX-NEXT: shrq $32, %rcx
|
|
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
|
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
|
|
; NoVLX-NEXT: shrq $48, %rax
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3
|
|
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2
|
|
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5
|
|
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2
|
|
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
|
|
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %eax, %xmm2
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
|
|
; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4
|
|
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
|
|
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
|
|
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm3
|
|
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %ecx
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: shlq $32, %rax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <32 x i16>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <32 x i16>
|
|
%2 = icmp ult <32 x i16> %0, %1
|
|
%3 = bitcast i32 %__u to <32 x i1>
|
|
%4 = and <32 x i1> %2, %3
|
|
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1532:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1533:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1534:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1535:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1536:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1537:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1538:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1539:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1540:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1541:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1542:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1543:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1544:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1545:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1546:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1547:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1548:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1549:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1550:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1551:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1552:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1553:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1554:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1555:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1556:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1557:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1558:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1559:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1560:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1561:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x i32>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1562:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1563:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1564:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1565:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1566:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1567:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
|
|
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1568:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1569:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1570:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1571:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1572:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1573:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1574:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1575:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1576:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1577:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1578:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1579:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1580:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1581:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1582:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1583:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1584:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1585:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1586:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1587:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1588:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1589:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1590:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1591:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1592:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1593:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1594:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%1 = bitcast <4 x i64> %__b to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1595:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1596:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1597:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x i32>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1598:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1599:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1600:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1601:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1602:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1603:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i32> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1604:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1605:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1606:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1607:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1608:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1609:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1610:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1611:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1612:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1613:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1614:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1615:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1616:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1617:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1618:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1619:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1620:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1621:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1622:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1623:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1624:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1625:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1626:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1627:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1628:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1629:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1630:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1631:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1632:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1633:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1634:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1635:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1636:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1637:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1638:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1639:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1640:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1641:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1642:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1643:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1644:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1645:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1646:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1647:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1648:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1649:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1650:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1651:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1652:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1653:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1654:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1655:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1656:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1657:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1658:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1659:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1660:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1661:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1662:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1663:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1664:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1665:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1666:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1667:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1668:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1669:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1670:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1671:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1672:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1673:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1674:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1675:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%1 = bitcast <8 x i64> %__b to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1676:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1677:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1678:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1679:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1680:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1681:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1682:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1683:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x i32>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1684:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1685:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1686:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1687:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1688:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1689:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1690:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1691:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1692:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1693:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1694:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1695:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1696:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1697:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1698:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1699:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x i32>
|
|
%load = load i32, i32* %__b
|
|
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
|
|
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <16 x i32> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %3, %2
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1700:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1701:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1702:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1703:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1704:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1705:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1706:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1707:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1708:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1709:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1710:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1711:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1712:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1713:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1714:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1715:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1716:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1717:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1718:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1719:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1720:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1721:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1722:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1723:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1724:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1725:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1726:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%1 = bitcast <2 x i64> %__b to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1727:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1728:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1729:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x i64>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %2, %extract.i
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1730:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1731:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1732:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1733:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1734:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1735:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
|
|
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = icmp ult <2 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = and <2 x i1> %extract.i, %2
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1736:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1737:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1738:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1739:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1740:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1741:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1742:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1743:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1744:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1745:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1746:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1747:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1748:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1749:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1750:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1751:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1752:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1753:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1754:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1755:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1756:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1757:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1758:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1759:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1760:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1761:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1762:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%1 = bitcast <4 x i64> %__b to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1763:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1764:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1765:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x i64>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %2, %extract.i
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1766:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1767:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1768:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1769:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1770:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1771:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
|
|
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kmovw %edi, %k0
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
|
|
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
|
|
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: kmovw %k3, %ecx
|
|
; NoVLX-NEXT: vmovd %ecx, %xmm1
|
|
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k2, %eax
|
|
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <4 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = and <4 x i1> %extract.i, %2
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1772:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1773:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1774:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1775:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1776:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1777:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1778:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1779:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1780:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1781:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1782:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1783:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1784:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1785:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1786:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1787:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1788:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1789:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1790:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1791:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1792:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1793:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1794:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1795:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1796:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1797:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1798:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%1 = bitcast <8 x i64> %__b to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1799:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1800:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1801:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x i64>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1802:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1803:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1804:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1805:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1806:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1807:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x i64>
|
|
%load = load i64, i64* %__b
|
|
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
|
|
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = icmp ult <8 x i64> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %3, %2
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
|
|
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1808:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1809:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1810:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1811:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1812:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1813:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1814:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1815:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1816:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1817:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1818:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1819:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1820:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1821:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1822:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1823:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1824:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1825:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1826:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1827:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1828:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1829:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1830:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1831:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1832:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1833:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1834:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1835:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1836:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1837:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%1 = bitcast <2 x i64> %__b to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1838:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1839:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1840:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <4 x float>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1841:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1842:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1843:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
|
|
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <4 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <4 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x float> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%1 = bitcast <4 x i64> %__b to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <8 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%1 = bitcast <4 x i64> %__b to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <8 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1844:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1845:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1846:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%1 = bitcast <4 x i64> %__b to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1847:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1848:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1849:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1850:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1851:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1852:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <8 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1853:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1854:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1855:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%1 = bitcast <4 x i64> %__b to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1856:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1857:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1858:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1859:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1860:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1861:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <8 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1862:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1863:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1864:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%1 = bitcast <4 x i64> %__b to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1865:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1866:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1867:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1868:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1869:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1870:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <8 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1871:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1872:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1873:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%1 = bitcast <4 x i64> %__b to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1874:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1875:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1876:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <8 x float>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1877:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1878:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1879:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
|
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <8 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <8 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x float> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1880:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1881:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1882:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1883:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1884:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1885:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1886:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1887:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1888:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1889:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1890:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1891:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1892:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1893:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1894:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1895:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1896:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1897:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1898:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1899:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1900:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1901:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1902:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1903:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <16 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1904:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1905:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1906:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1907:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1908:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1909:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1910:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1911:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1912:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1913:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1914:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1915:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1916:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1917:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1918:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1919:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1920:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1921:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1922:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: .Lcfi1923:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1924:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1925:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1926:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1927:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <16 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
|
|
; CHECK-NEXT: kmovw %k0, %eax
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
|
|
%3 = zext i16 %2 to i32
|
|
ret i32 %3
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovw %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 %__u, i32 8)
|
|
%3 = zext i16 %2 to i32
|
|
ret i32 %3
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1928:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1929:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1930:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1931:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1932:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1933:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1934:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1935:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1936:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1937:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1938:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1939:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1940:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1941:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1942:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1943:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1944:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1945:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1946:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1947:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1948:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1949:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1950:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1951:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <16 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1952:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1953:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1954:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1955:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1956:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1957:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1958:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1959:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1960:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1961:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1962:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1963:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1964:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1965:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1966:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1967:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <16 x float>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1968:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1969:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1970:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: pushq %r15
|
|
; NoVLX-NEXT: pushq %r14
|
|
; NoVLX-NEXT: pushq %r13
|
|
; NoVLX-NEXT: pushq %r12
|
|
; NoVLX-NEXT: pushq %rbx
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: .Lcfi1971:
|
|
; NoVLX-NEXT: .cfi_offset %rbx, -56
|
|
; NoVLX-NEXT: .Lcfi1972:
|
|
; NoVLX-NEXT: .cfi_offset %r12, -48
|
|
; NoVLX-NEXT: .Lcfi1973:
|
|
; NoVLX-NEXT: .cfi_offset %r13, -40
|
|
; NoVLX-NEXT: .Lcfi1974:
|
|
; NoVLX-NEXT: .cfi_offset %r14, -32
|
|
; NoVLX-NEXT: .Lcfi1975:
|
|
; NoVLX-NEXT: .cfi_offset %r15, -24
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r11d
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r14d
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r15d
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r12d
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r13d
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ebx
|
|
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vmovd %r10d, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %r10d
|
|
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: leaq -40(%rbp), %rsp
|
|
; NoVLX-NEXT: popq %rbx
|
|
; NoVLX-NEXT: popq %r12
|
|
; NoVLX-NEXT: popq %r13
|
|
; NoVLX-NEXT: popq %r14
|
|
; NoVLX-NEXT: popq %r15
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%load = load float, float* %__b
|
|
%vec = insertelement <16 x float> undef, float %load, i32 0
|
|
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <16 x float> %0, %1
|
|
%3 = bitcast i16 %__u to <16 x i1>
|
|
%4 = and <16 x i1> %2, %3
|
|
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: movzwl %ax, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzwl %ax, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
|
|
%3 = zext i16 %2 to i64
|
|
ret i64 %3
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: movzwl %ax, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzwl %ax, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <16 x float>
|
|
%1 = bitcast <8 x i64> %__b to <16 x float>
|
|
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 %__u, i32 8)
|
|
%3 = zext i16 %2 to i64
|
|
ret i64 %3
|
|
}
|
|
|
|
|
|
|
|
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
|
|
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%4 = bitcast <4 x i1> %3 to i4
|
|
ret i4 %4
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%6 = bitcast <4 x i1> %5 to i4
|
|
ret i4 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1976:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1977:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1978:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1979:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1980:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1981:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1982:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1983:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1984:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1985:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1986:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1987:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1988:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1989:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1990:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1991:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1992:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1993:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1994:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1995:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1996:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi1997:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi1998:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi1999:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2000:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2001:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2002:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2003:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2004:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2005:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%1 = bitcast <2 x i64> %__b to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2006:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2007:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2008:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load <2 x i64>, <2 x i64>* %__b
|
|
%1 = bitcast <2 x i64> %load to <2 x double>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2009:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2010:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2011:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
|
|
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <2 x i64> %__a to <2 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <2 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
|
%2 = fcmp oeq <2 x double> %0, %1
|
|
%3 = bitcast i2 %__u to <2 x i1>
|
|
%4 = and <2 x i1> %2, %3
|
|
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i1> %3 to i8
|
|
ret i8 %4
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
|
|
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <8 x i1> %5 to i8
|
|
ret i8 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
|
|
; NoVLX-NEXT: andl $1, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
|
|
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
|
|
; NoVLX-NEXT: korw %k0, %k1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
|
|
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
|
|
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
|
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2012:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2013:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2014:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2015:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2016:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2017:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2018:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2019:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2020:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2021:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2022:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2023:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2024:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2025:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2026:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2027:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2028:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2029:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2030:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2031:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2032:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2033:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2034:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2035:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2036:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2037:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2038:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2039:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2040:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2041:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%1 = bitcast <4 x i64> %__b to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2042:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2043:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2044:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load <4 x i64>, <4 x i64>* %__b
|
|
%1 = bitcast <4 x i64> %load to <4 x double>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
|
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2045:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2046:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2047:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $96, %rsp
|
|
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: kmovw %eax, %k1
|
|
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
|
|
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
|
|
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
|
|
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
|
|
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
|
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x i64> %__a to <4 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <4 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <4 x double> %0, %1
|
|
%3 = bitcast i4 %__u to <4 x i1>
|
|
%4 = and <4 x i1> %2, %3
|
|
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <8 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <16 x i1> %3 to i16
|
|
ret i16 %4
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <8 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <16 x i1> %5 to i16
|
|
ret i16 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: movzbl %al, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzbl %al, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
|
|
%3 = zext i8 %2 to i16
|
|
ret i16 %3
|
|
}
|
|
|
|
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: movzbl %al, %eax
|
|
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzbl %al, %eax
|
|
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
|
|
%3 = zext i8 %2 to i16
|
|
ret i16 %3
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2048:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2049:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2050:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2051:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2052:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2053:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2054:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2055:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2056:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <8 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <32 x i1> %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2057:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2058:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2059:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2060:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2061:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2062:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2063:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2064:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2065:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $32, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <8 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <32 x i1> %5 to i32
|
|
ret i32 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovb %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzbl %al, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
|
|
%3 = zext i8 %2 to i32
|
|
ret i32 %3
|
|
}
|
|
|
|
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovb %k0, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzbl %al, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
|
|
%3 = zext i8 %2 to i32
|
|
ret i32 %3
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2066:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2067:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2068:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2069:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2070:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2071:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2072:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2073:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2074:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <8 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%4 = bitcast <64 x i1> %3 to i64
|
|
ret i64 %4
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2075:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2076:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2077:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2078:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2079:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2080:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load <8 x i64>, <8 x i64>* %__b
|
|
%1 = bitcast <8 x i64> %load to <8 x double>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovq %k0, %rax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: pushq %rbp
|
|
; NoVLX-NEXT: .Lcfi2081:
|
|
; NoVLX-NEXT: .cfi_def_cfa_offset 16
|
|
; NoVLX-NEXT: .Lcfi2082:
|
|
; NoVLX-NEXT: .cfi_offset %rbp, -16
|
|
; NoVLX-NEXT: movq %rsp, %rbp
|
|
; NoVLX-NEXT: .Lcfi2083:
|
|
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
|
|
; NoVLX-NEXT: andq $-32, %rsp
|
|
; NoVLX-NEXT: subq $64, %rsp
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kxorw %k0, %k0, %k1
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
|
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r8d
|
|
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %r9d
|
|
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edx
|
|
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %esi
|
|
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %edi
|
|
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %eax
|
|
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
|
|
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
|
|
; NoVLX-NEXT: kmovw %k1, %ecx
|
|
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
|
|
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
|
|
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, (%rsp)
|
|
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
; NoVLX-NEXT: shlq $32, %rcx
|
|
; NoVLX-NEXT: movl (%rsp), %eax
|
|
; NoVLX-NEXT: orq %rcx, %rax
|
|
; NoVLX-NEXT: movq %rbp, %rsp
|
|
; NoVLX-NEXT: popq %rbp
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%load = load double, double* %__b
|
|
%vec = insertelement <8 x double> undef, double %load, i32 0
|
|
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
%2 = fcmp oeq <8 x double> %0, %1
|
|
%3 = bitcast i8 %__u to <8 x i1>
|
|
%4 = and <8 x i1> %2, %3
|
|
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%6 = bitcast <64 x i1> %5 to i64
|
|
ret i64 %6
|
|
}
|
|
|
|
|
|
|
|
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: movzbl %al, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzbl %al, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
|
|
%3 = zext i8 %2 to i64
|
|
ret i64 %3
|
|
}
|
|
|
|
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
|
|
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
|
|
; VLX: # BB#0: # %entry
|
|
; VLX-NEXT: kmovd %edi, %k1
|
|
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; VLX-NEXT: kmovd %k0, %eax
|
|
; VLX-NEXT: movzbl %al, %eax
|
|
; VLX-NEXT: vzeroupper
|
|
; VLX-NEXT: retq
|
|
;
|
|
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
|
|
; NoVLX: # BB#0: # %entry
|
|
; NoVLX-NEXT: kmovw %edi, %k1
|
|
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
|
|
; NoVLX-NEXT: kmovw %k0, %eax
|
|
; NoVLX-NEXT: movzbl %al, %eax
|
|
; NoVLX-NEXT: vzeroupper
|
|
; NoVLX-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <8 x i64> %__a to <8 x double>
|
|
%1 = bitcast <8 x i64> %__b to <8 x double>
|
|
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
|
|
%3 = zext i8 %2 to i64
|
|
ret i64 %3
|
|
}
|
|
|
|
|
|
|