forked from OSchip/llvm-project
3449 lines
108 KiB
LLVM
3449 lines
108 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
|
|
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
|
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
|
|
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
|
|
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
|
|
|
|
|
|
define i16 @mask16(i16 %x) {
|
|
; CHECK-LABEL: mask16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: notl %edi
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask16:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: notl %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <16 x i1> %m1 to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i32 @mask16_zext(i16 %x) {
|
|
; CHECK-LABEL: mask16_zext:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: notl %edi
|
|
; CHECK-NEXT: movzwl %di, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask16_zext:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: xorl $65535, %eax ## imm = 0xFFFF
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%m2 = bitcast <16 x i1> %m1 to i16
|
|
%ret = zext i16 %m2 to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
define i8 @mask8(i8 %x) {
|
|
; CHECK-LABEL: mask8:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: notb %dil
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask8:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
|
; X86-NEXT: notb %al
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <8 x i1> %m1 to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
define i32 @mask8_zext(i8 %x) {
|
|
; CHECK-LABEL: mask8_zext:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: notb %dil
|
|
; CHECK-NEXT: movzbl %dil, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask8_zext:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
|
; X86-NEXT: notb %al
|
|
; X86-NEXT: movzbl %al, %eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%m2 = bitcast <8 x i1> %m1 to i8
|
|
%ret = zext i8 %m2 to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
define void @mask16_mem(i16* %ptr) {
|
|
; CHECK-LABEL: mask16_mem:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovw (%rdi), %k0
|
|
; CHECK-NEXT: knotw %k0, %k0
|
|
; CHECK-NEXT: kmovw %k0, (%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask16_mem:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovw (%eax), %k0
|
|
; X86-NEXT: knotw %k0, %k0
|
|
; X86-NEXT: kmovw %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = load i16, i16* %ptr, align 4
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <16 x i1> %m1 to i16
|
|
store i16 %ret, i16* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @mask8_mem(i8* %ptr) {
|
|
; KNL-LABEL: mask8_mem:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: notb (%rdi)
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: mask8_mem:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
; SKX-NEXT: knotb %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: mask8_mem:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: notb (%rdi)
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: mask8_mem:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovb (%rdi), %k0
|
|
; AVX512DQ-NEXT: knotb %k0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask8_mem:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb (%eax), %k0
|
|
; X86-NEXT: knotb %k0, %k0
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = load i8, i8* %ptr, align 4
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <8 x i1> %m1 to i8
|
|
store i8 %ret, i8* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define i16 @mand16(i16 %x, i16 %y) {
|
|
; CHECK-LABEL: mand16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: xorl %esi, %eax
|
|
; CHECK-NEXT: andl %esi, %edi
|
|
; CHECK-NEXT: orl %eax, %edi
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: mand16:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl %eax, %edx
|
|
; X86-NEXT: andl %ecx, %edx
|
|
; X86-NEXT: xorl %ecx, %eax
|
|
; X86-NEXT: orl %edx, %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: retl
|
|
%ma = bitcast i16 %x to <16 x i1>
|
|
%mb = bitcast i16 %y to <16 x i1>
|
|
%mc = and <16 x i1> %ma, %mb
|
|
%md = xor <16 x i1> %ma, %mb
|
|
%me = or <16 x i1> %mc, %md
|
|
%ret = bitcast <16 x i1> %me to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
|
|
; KNL-LABEL: mand16_mem:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw (%rdi), %k0
|
|
; KNL-NEXT: kmovw (%rsi), %k1
|
|
; KNL-NEXT: kandw %k1, %k0, %k2
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
; KNL-NEXT: korw %k0, %k2, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: mand16_mem:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovw (%rdi), %k0
|
|
; SKX-NEXT: kmovw (%rsi), %k1
|
|
; SKX-NEXT: kandw %k1, %k0, %k2
|
|
; SKX-NEXT: kxorw %k1, %k0, %k0
|
|
; SKX-NEXT: korw %k0, %k2, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: mand16_mem:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovw (%rdi), %k0
|
|
; AVX512BW-NEXT: kmovw (%rsi), %k1
|
|
; AVX512BW-NEXT: kandw %k1, %k0, %k2
|
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: korw %k0, %k2, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: mand16_mem:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw (%rdi), %k0
|
|
; AVX512DQ-NEXT: kmovw (%rsi), %k1
|
|
; AVX512DQ-NEXT: kandw %k1, %k0, %k2
|
|
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: korw %k0, %k2, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: mand16_mem:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: kmovw (%ecx), %k0
|
|
; X86-NEXT: kmovw (%eax), %k1
|
|
; X86-NEXT: kandw %k1, %k0, %k2
|
|
; X86-NEXT: kxorw %k1, %k0, %k0
|
|
; X86-NEXT: korw %k0, %k2, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: retl
|
|
%ma = load <16 x i1>, <16 x i1>* %x
|
|
%mb = load <16 x i1>, <16 x i1>* %y
|
|
%mc = and <16 x i1> %ma, %mb
|
|
%md = xor <16 x i1> %ma, %mb
|
|
%me = or <16 x i1> %mc, %md
|
|
%ret = bitcast <16 x i1> %me to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i8 @shuf_test1(i16 %v) nounwind {
|
|
; KNL-LABEL: shuf_test1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kshiftrw $8, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: shuf_test1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kshiftrw $8, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: shuf_test1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kshiftrw $8, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: shuf_test1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: shuf_test1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
|
; X86-NEXT: retl
|
|
%v1 = bitcast i16 %v to <16 x i1>
|
|
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%mask1 = bitcast <8 x i1> %mask to i8
|
|
ret i8 %mask1
|
|
}
|
|
|
|
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|
; KNL-LABEL: zext_test1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; KNL-NEXT: kshiftrw $5, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: zext_test1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: andl $1, %eax
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: zext_test1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: andl $1, %eax
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: zext_test1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: andl $1, %eax
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: zext_test1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; X86-NEXT: kshiftrw $5, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: andl $1, %eax
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%cmp_res = icmp ugt <16 x i32> %a, %b
|
|
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
|
%res = zext i1 %cmp_res.i1 to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|
; KNL-LABEL: zext_test2:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; KNL-NEXT: kshiftrw $5, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: zext_test2:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: andl $1, %eax
|
|
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: zext_test2:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: andl $1, %eax
|
|
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: zext_test2:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: andl $1, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: zext_test2:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; X86-NEXT: kshiftrw $5, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: andl $1, %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%cmp_res = icmp ugt <16 x i32> %a, %b
|
|
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
|
%res = zext i1 %cmp_res.i1 to i16
|
|
ret i16 %res
|
|
}
|
|
|
|
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|
; KNL-LABEL: zext_test3:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; KNL-NEXT: kshiftrw $5, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: zext_test3:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: andb $1, %al
|
|
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: zext_test3:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: andb $1, %al
|
|
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: zext_test3:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: andb $1, %al
|
|
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: zext_test3:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; X86-NEXT: kshiftrw $5, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: andb $1, %al
|
|
; X86-NEXT: ## kill: def $al killed $al killed $eax
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%cmp_res = icmp ugt <16 x i32> %a, %b
|
|
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
|
%res = zext i1 %cmp_res.i1 to i8
|
|
ret i8 %res
|
|
}
|
|
|
|
define i8 @conv1(<8 x i1>* %R) {
|
|
; CHECK-LABEL: conv1:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movb $-1, (%rdi)
|
|
; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movb $-2, %al
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: conv1:
|
|
; X86: ## %bb.0: ## %entry
|
|
; X86-NEXT: subl $12, %esp
|
|
; X86-NEXT: .cfi_def_cfa_offset 16
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movb $-1, (%eax)
|
|
; X86-NEXT: movb $-2, (%esp)
|
|
; X86-NEXT: movb $-2, %al
|
|
; X86-NEXT: addl $12, %esp
|
|
; X86-NEXT: retl
|
|
entry:
|
|
store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
|
|
|
|
%maskPtr = alloca <8 x i1>
|
|
store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
|
|
%mask = load <8 x i1>, <8 x i1>* %maskPtr
|
|
%mask_convert = bitcast <8 x i1> %mask to i8
|
|
ret i8 %mask_convert
|
|
}
|
|
|
|
define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
|
|
; KNL-LABEL: test4:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
|
|
; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
|
|
; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
|
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
|
; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
|
|
; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test4:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1
|
|
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
|
|
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test4:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
|
|
; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
|
|
; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
|
; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
|
; AVX512BW-NEXT: vpcmpleq %zmm1, %zmm0, %k1
|
|
; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
|
|
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test4:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
|
|
; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
|
|
; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
|
|
; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
|
|
; AVX512DQ-NEXT: vpcmpleq %zmm1, %zmm0, %k1
|
|
; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test4:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k1
|
|
; X86-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
|
|
; X86-NEXT: vpmovm2d %k0, %xmm0
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%x_gt_y = icmp sgt <4 x i64> %x, %y
|
|
%x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
|
|
%res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
|
|
%resse = sext <4 x i1>%res to <4 x i32>
|
|
ret <4 x i32> %resse
|
|
}
|
|
|
|
define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
|
|
; KNL-LABEL: test5:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
|
|
; KNL-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
|
|
; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; KNL-NEXT: vpcmpleq %zmm3, %zmm2, %k1
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
|
|
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test5:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1
|
|
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
|
|
; SKX-NEXT: vpmovm2q %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test5:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
|
|
; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
|
|
; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; AVX512BW-NEXT: vpcmpleq %zmm3, %zmm2, %k1
|
|
; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
|
|
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test5:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: ## kill: def $xmm3 killed $xmm3 def $zmm3
|
|
; AVX512DQ-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
|
|
; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; AVX512DQ-NEXT: vpcmpleq %zmm3, %zmm2, %k1
|
|
; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
|
|
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test5:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpcmpleq %xmm3, %xmm2, %k1
|
|
; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
|
|
; X86-NEXT: vpmovm2q %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%x_gt_y = icmp slt <2 x i64> %x, %y
|
|
%x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
|
|
%res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
|
|
%resse = sext <2 x i1>%res to <2 x i64>
|
|
ret <2 x i64> %resse
|
|
}define void @test6(<16 x i1> %mask) {
|
|
allocas:
|
|
%a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
|
|
%b = bitcast <16 x i1> %a to i16
|
|
%c = icmp eq i16 %b, 0
|
|
br i1 %c, label %true, label %false
|
|
|
|
true:
|
|
ret void
|
|
|
|
false:
|
|
ret void
|
|
}
|
|
define void @test7(<8 x i1> %mask) {
|
|
; KNL-LABEL: test7:
|
|
; KNL: ## %bb.0: ## %allocas
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: orb $85, %al
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test7:
|
|
; SKX: ## %bb.0: ## %allocas
|
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: orb $85, %al
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test7:
|
|
; AVX512BW: ## %bb.0: ## %allocas
|
|
; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: orb $85, %al
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test7:
|
|
; AVX512DQ: ## %bb.0: ## %allocas
|
|
; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: orb $85, %al
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test7:
|
|
; X86: ## %bb.0: ## %allocas
|
|
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovw2m %xmm0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: orb $85, %al
|
|
; X86-NEXT: retl
|
|
allocas:
|
|
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
|
|
%b = bitcast <8 x i1> %a to i8
|
|
%c = icmp eq i8 %b, 0
|
|
br i1 %c, label %true, label %false
|
|
|
|
true:
|
|
ret void
|
|
|
|
false:
|
|
ret void
|
|
}
|
|
define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
|
|
; KNL-LABEL: test8:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: jg LBB17_1
|
|
; KNL-NEXT: ## %bb.2:
|
|
; KNL-NEXT: kxorw %k0, %k0, %k1
|
|
; KNL-NEXT: jmp LBB17_3
|
|
; KNL-NEXT: LBB17_1:
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
|
|
; KNL-NEXT: LBB17_3:
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test8:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: jg LBB17_1
|
|
; SKX-NEXT: ## %bb.2:
|
|
; SKX-NEXT: kxorw %k0, %k0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
; SKX-NEXT: LBB17_1:
|
|
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test8:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: cmpl %esi, %edi
|
|
; AVX512BW-NEXT: jg LBB17_1
|
|
; AVX512BW-NEXT: ## %bb.2:
|
|
; AVX512BW-NEXT: kxorw %k0, %k0, %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
; AVX512BW-NEXT: LBB17_1:
|
|
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test8:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: cmpl %esi, %edi
|
|
; AVX512DQ-NEXT: jg LBB17_1
|
|
; AVX512DQ-NEXT: ## %bb.2:
|
|
; AVX512DQ-NEXT: kxorw %k0, %k0, %k0
|
|
; AVX512DQ-NEXT: jmp LBB17_3
|
|
; AVX512DQ-NEXT: LBB17_1:
|
|
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; AVX512DQ-NEXT: LBB17_3:
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test8:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: jg LBB17_1
|
|
; X86-NEXT: ## %bb.2:
|
|
; X86-NEXT: kxorw %k0, %k0, %k0
|
|
; X86-NEXT: vpmovm2b %k0, %xmm0
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
; X86-NEXT: LBB17_1:
|
|
; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
; X86-NEXT: vpmovm2b %k0, %xmm0
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%cond = icmp sgt i32 %a1, %b1
|
|
%cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
|
|
%cmp2 = icmp ult <16 x i32> %b, zeroinitializer
|
|
%mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
|
|
%res = sext <16 x i1> %mix to <16 x i8>
|
|
ret <16 x i8> %res
|
|
}
|
|
define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
|
|
; KNL-LABEL: test9:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: jg LBB18_1
|
|
; KNL-NEXT: ## %bb.2:
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
|
|
; KNL-NEXT: jmp LBB18_3
|
|
; KNL-NEXT: LBB18_1:
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; KNL-NEXT: LBB18_3:
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test9:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: jg LBB18_1
|
|
; SKX-NEXT: ## %bb.2:
|
|
; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
|
|
; SKX-NEXT: jmp LBB18_3
|
|
; SKX-NEXT: LBB18_1:
|
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; SKX-NEXT: LBB18_3:
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test9:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: cmpl %esi, %edi
|
|
; AVX512BW-NEXT: jg LBB18_1
|
|
; AVX512BW-NEXT: ## %bb.2:
|
|
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
|
|
; AVX512BW-NEXT: jmp LBB18_3
|
|
; AVX512BW-NEXT: LBB18_1:
|
|
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: LBB18_3:
|
|
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test9:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: cmpl %esi, %edi
|
|
; AVX512DQ-NEXT: jg LBB18_1
|
|
; AVX512DQ-NEXT: ## %bb.2:
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
|
|
; AVX512DQ-NEXT: jmp LBB18_3
|
|
; AVX512DQ-NEXT: LBB18_1:
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: LBB18_3:
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test9:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: jg LBB18_1
|
|
; X86-NEXT: ## %bb.2:
|
|
; X86-NEXT: vpsllw $7, %xmm1, %xmm0
|
|
; X86-NEXT: jmp LBB18_3
|
|
; X86-NEXT: LBB18_1:
|
|
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; X86-NEXT: LBB18_3:
|
|
; X86-NEXT: vpmovb2m %xmm0, %k0
|
|
; X86-NEXT: vpmovm2b %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%mask = icmp sgt i32 %a1, %b1
|
|
%c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
|
|
ret <16 x i1>%c
|
|
}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
|
|
%mask = icmp sgt i32 %a1, %b1
|
|
%c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
|
|
ret <8 x i1>%c
|
|
}
|
|
|
|
define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
|
|
; KNL-LABEL: test11:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: jg LBB20_1
|
|
; KNL-NEXT: ## %bb.2:
|
|
; KNL-NEXT: vpslld $31, %xmm1, %xmm0
|
|
; KNL-NEXT: jmp LBB20_3
|
|
; KNL-NEXT: LBB20_1:
|
|
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; KNL-NEXT: LBB20_3:
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test11:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: jg LBB20_1
|
|
; SKX-NEXT: ## %bb.2:
|
|
; SKX-NEXT: vpslld $31, %xmm1, %xmm0
|
|
; SKX-NEXT: jmp LBB20_3
|
|
; SKX-NEXT: LBB20_1:
|
|
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; SKX-NEXT: LBB20_3:
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test11:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: cmpl %esi, %edi
|
|
; AVX512BW-NEXT: jg LBB20_1
|
|
; AVX512BW-NEXT: ## %bb.2:
|
|
; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
|
|
; AVX512BW-NEXT: jmp LBB20_3
|
|
; AVX512BW-NEXT: LBB20_1:
|
|
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: LBB20_3:
|
|
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test11:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: cmpl %esi, %edi
|
|
; AVX512DQ-NEXT: jg LBB20_1
|
|
; AVX512DQ-NEXT: ## %bb.2:
|
|
; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0
|
|
; AVX512DQ-NEXT: jmp LBB20_3
|
|
; AVX512DQ-NEXT: LBB20_1:
|
|
; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; AVX512DQ-NEXT: LBB20_3:
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test11:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: jg LBB20_1
|
|
; X86-NEXT: ## %bb.2:
|
|
; X86-NEXT: vpslld $31, %xmm1, %xmm0
|
|
; X86-NEXT: jmp LBB20_3
|
|
; X86-NEXT: LBB20_1:
|
|
; X86-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; X86-NEXT: LBB20_3:
|
|
; X86-NEXT: vpmovd2m %xmm0, %k0
|
|
; X86-NEXT: vpmovm2d %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%mask = icmp sgt i32 %a1, %b1
|
|
%c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
|
|
ret <4 x i1>%c
|
|
}
|
|
|
|
define i32 @test12(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: test12:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: test12:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = extractelement <16 x i1> %a, i32 0
|
|
%c = select i1 %b, i32 %x, i32 %y
|
|
ret i32 %c
|
|
}
|
|
|
|
define i32 @test13(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: test13:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: test13:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = extractelement <16 x i1> %a, i32 3
|
|
%c = select i1 %b, i32 %x, i32 %y
|
|
ret i32 %c
|
|
}
|
|
|
|
; Make sure we don't crash on a large vector.
|
|
define i32 @test13_crash(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: test13_crash:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: test13_crash:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
%a = bitcast i128 2184568686868686868686868686 to <128 x i1>
|
|
%b = extractelement <128 x i1> %a, i32 3
|
|
%c = select i1 %b, i32 %x, i32 %y
|
|
ret i32 %c
|
|
}
|
|
|
|
define <4 x i1> @test14() {
|
|
; CHECK-LABEL: test14:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: test14:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,0,1]
|
|
; X86-NEXT: retl
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = extractelement <16 x i1> %a, i32 2
|
|
%c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <16 x i1> @test15(i32 %x, i32 %y) {
|
|
; KNL-LABEL: test15:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: movl $21845, %eax ## imm = 0x5555
|
|
; KNL-NEXT: movl $1, %ecx
|
|
; KNL-NEXT: cmovgl %eax, %ecx
|
|
; KNL-NEXT: kmovw %ecx, %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test15:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: movl $21845, %eax ## imm = 0x5555
|
|
; SKX-NEXT: movl $1, %ecx
|
|
; SKX-NEXT: cmovgl %eax, %ecx
|
|
; SKX-NEXT: kmovd %ecx, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test15:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: cmpl %esi, %edi
|
|
; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555
|
|
; AVX512BW-NEXT: movl $1, %ecx
|
|
; AVX512BW-NEXT: cmovgl %eax, %ecx
|
|
; AVX512BW-NEXT: kmovd %ecx, %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test15:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: cmpl %esi, %edi
|
|
; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555
|
|
; AVX512DQ-NEXT: movl $1, %ecx
|
|
; AVX512DQ-NEXT: cmovgl %eax, %ecx
|
|
; AVX512DQ-NEXT: kmovw %ecx, %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test15:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl $21845, %eax ## imm = 0x5555
|
|
; X86-NEXT: movl $1, %ecx
|
|
; X86-NEXT: cmovgl %eax, %ecx
|
|
; X86-NEXT: kmovd %ecx, %k0
|
|
; X86-NEXT: vpmovm2b %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = bitcast i16 1 to <16 x i1>
|
|
%mask = icmp sgt i32 %x, %y
|
|
%c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
|
|
ret <16 x i1> %c
|
|
}
|
|
|
|
define <64 x i8> @test16(i64 %x) {
|
|
;
|
|
; KNL-LABEL: test16:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: movq %rdi, %rax
|
|
; KNL-NEXT: movl %edi, %ecx
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: shrq $32, %rdi
|
|
; KNL-NEXT: shrq $48, %rax
|
|
; KNL-NEXT: shrl $16, %ecx
|
|
; KNL-NEXT: kmovw %ecx, %k1
|
|
; KNL-NEXT: kmovw %eax, %k2
|
|
; KNL-NEXT: kmovw %edi, %k3
|
|
; KNL-NEXT: movb $1, %al
|
|
; KNL-NEXT: kmovw %eax, %k4
|
|
; KNL-NEXT: kshiftrw $5, %k0, %k5
|
|
; KNL-NEXT: kxorw %k4, %k5, %k4
|
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
|
; KNL-NEXT: kshiftrw $10, %k4, %k4
|
|
; KNL-NEXT: kxorw %k4, %k0, %k4
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test16:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovq %rdi, %k0
|
|
; SKX-NEXT: movb $1, %al
|
|
; SKX-NEXT: kmovd %eax, %k1
|
|
; SKX-NEXT: kshiftrq $5, %k0, %k2
|
|
; SKX-NEXT: kxorq %k1, %k2, %k1
|
|
; SKX-NEXT: kshiftlq $63, %k1, %k1
|
|
; SKX-NEXT: kshiftrq $58, %k1, %k1
|
|
; SKX-NEXT: kxorq %k1, %k0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test16:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovq %rdi, %k0
|
|
; AVX512BW-NEXT: movb $1, %al
|
|
; AVX512BW-NEXT: kmovd %eax, %k1
|
|
; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
|
|
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
|
|
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
|
|
; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
|
|
; AVX512BW-NEXT: kxorq %k1, %k0, %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test16:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: movq %rdi, %rax
|
|
; AVX512DQ-NEXT: movl %edi, %ecx
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: shrq $32, %rdi
|
|
; AVX512DQ-NEXT: shrq $48, %rax
|
|
; AVX512DQ-NEXT: shrl $16, %ecx
|
|
; AVX512DQ-NEXT: kmovw %ecx, %k1
|
|
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
; AVX512DQ-NEXT: kmovw %edi, %k3
|
|
; AVX512DQ-NEXT: movb $1, %al
|
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
|
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5
|
|
; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
|
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
|
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
|
|
; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
|
|
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
|
|
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
|
|
; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
|
|
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test16:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: movb $1, %al
|
|
; X86-NEXT: kmovd %eax, %k1
|
|
; X86-NEXT: kshiftrq $5, %k0, %k2
|
|
; X86-NEXT: kxorq %k1, %k2, %k1
|
|
; X86-NEXT: kshiftlq $63, %k1, %k1
|
|
; X86-NEXT: kshiftrq $58, %k1, %k1
|
|
; X86-NEXT: kxorq %k1, %k0, %k0
|
|
; X86-NEXT: vpmovm2b %k0, %zmm0
|
|
; X86-NEXT: retl
|
|
%a = bitcast i64 %x to <64 x i1>
|
|
%b = insertelement <64 x i1>%a, i1 true, i32 5
|
|
%c = sext <64 x i1>%b to <64 x i8>
|
|
ret <64 x i8>%c
|
|
}
|
|
|
|
define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
|
|
;
|
|
; KNL-LABEL: test17:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: movq %rdi, %rax
|
|
; KNL-NEXT: movl %edi, %ecx
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: shrq $32, %rdi
|
|
; KNL-NEXT: shrq $48, %rax
|
|
; KNL-NEXT: shrl $16, %ecx
|
|
; KNL-NEXT: kmovw %ecx, %k1
|
|
; KNL-NEXT: kmovw %eax, %k2
|
|
; KNL-NEXT: kmovw %edi, %k3
|
|
; KNL-NEXT: cmpl %edx, %esi
|
|
; KNL-NEXT: setg %al
|
|
; KNL-NEXT: kshiftrw $5, %k0, %k4
|
|
; KNL-NEXT: kmovw %eax, %k5
|
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
|
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
|
; KNL-NEXT: kshiftrw $10, %k4, %k4
|
|
; KNL-NEXT: kxorw %k4, %k0, %k4
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test17:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovq %rdi, %k0
|
|
; SKX-NEXT: cmpl %edx, %esi
|
|
; SKX-NEXT: setg %al
|
|
; SKX-NEXT: kmovd %eax, %k1
|
|
; SKX-NEXT: kshiftrq $5, %k0, %k2
|
|
; SKX-NEXT: kxorq %k1, %k2, %k1
|
|
; SKX-NEXT: kshiftlq $63, %k1, %k1
|
|
; SKX-NEXT: kshiftrq $58, %k1, %k1
|
|
; SKX-NEXT: kxorq %k1, %k0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test17:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovq %rdi, %k0
|
|
; AVX512BW-NEXT: cmpl %edx, %esi
|
|
; AVX512BW-NEXT: setg %al
|
|
; AVX512BW-NEXT: kmovd %eax, %k1
|
|
; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
|
|
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
|
|
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
|
|
; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
|
|
; AVX512BW-NEXT: kxorq %k1, %k0, %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test17:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: movq %rdi, %rax
|
|
; AVX512DQ-NEXT: movl %edi, %ecx
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: shrq $32, %rdi
|
|
; AVX512DQ-NEXT: shrq $48, %rax
|
|
; AVX512DQ-NEXT: shrl $16, %ecx
|
|
; AVX512DQ-NEXT: kmovw %ecx, %k1
|
|
; AVX512DQ-NEXT: kmovw %eax, %k2
|
|
; AVX512DQ-NEXT: kmovw %edi, %k3
|
|
; AVX512DQ-NEXT: cmpl %edx, %esi
|
|
; AVX512DQ-NEXT: setg %al
|
|
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
|
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
|
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
|
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
|
|
; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
|
|
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
|
|
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
|
|
; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
|
|
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test17:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: setg %al
|
|
; X86-NEXT: kmovd %eax, %k1
|
|
; X86-NEXT: kshiftrq $5, %k0, %k2
|
|
; X86-NEXT: kxorq %k1, %k2, %k1
|
|
; X86-NEXT: kshiftlq $63, %k1, %k1
|
|
; X86-NEXT: kshiftrq $58, %k1, %k1
|
|
; X86-NEXT: kxorq %k1, %k0, %k0
|
|
; X86-NEXT: vpmovm2b %k0, %zmm0
|
|
; X86-NEXT: retl
|
|
%a = bitcast i64 %x to <64 x i1>
|
|
%b = icmp sgt i32 %y, %z
|
|
%c = insertelement <64 x i1>%a, i1 %b, i32 5
|
|
%d = sext <64 x i1>%c to <64 x i8>
|
|
ret <64 x i8>%d
|
|
}
|
|
|
|
define <8 x i1> @test18(i8 %a, i16 %y) {
|
|
; KNL-LABEL: test18:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k1
|
|
; KNL-NEXT: kmovw %esi, %k2
|
|
; KNL-NEXT: kshiftrw $8, %k2, %k0
|
|
; KNL-NEXT: kshiftrw $9, %k2, %k2
|
|
; KNL-NEXT: kshiftrw $6, %k1, %k3
|
|
; KNL-NEXT: kxorw %k2, %k3, %k2
|
|
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
|
; KNL-NEXT: kshiftrw $9, %k2, %k2
|
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
; KNL-NEXT: kshiftlw $9, %k1, %k1
|
|
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
|
; KNL-NEXT: kshiftlw $7, %k0, %k0
|
|
; KNL-NEXT: korw %k0, %k1, %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test18:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k1
|
|
; SKX-NEXT: kmovd %esi, %k2
|
|
; SKX-NEXT: kshiftrw $8, %k2, %k0
|
|
; SKX-NEXT: kshiftrw $9, %k2, %k2
|
|
; SKX-NEXT: kshiftrb $6, %k1, %k3
|
|
; SKX-NEXT: kxorb %k2, %k3, %k2
|
|
; SKX-NEXT: kshiftlb $7, %k2, %k2
|
|
; SKX-NEXT: kshiftrb $1, %k2, %k2
|
|
; SKX-NEXT: kxorb %k2, %k1, %k1
|
|
; SKX-NEXT: kshiftlb $1, %k1, %k1
|
|
; SKX-NEXT: kshiftrb $1, %k1, %k1
|
|
; SKX-NEXT: kshiftlb $7, %k0, %k0
|
|
; SKX-NEXT: korb %k0, %k1, %k0
|
|
; SKX-NEXT: vpmovm2w %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test18:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: kmovd %esi, %k2
|
|
; AVX512BW-NEXT: kshiftrw $8, %k2, %k0
|
|
; AVX512BW-NEXT: kshiftrw $9, %k2, %k2
|
|
; AVX512BW-NEXT: kshiftrw $6, %k1, %k3
|
|
; AVX512BW-NEXT: kxorw %k2, %k3, %k2
|
|
; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
|
|
; AVX512BW-NEXT: kshiftrw $9, %k2, %k2
|
|
; AVX512BW-NEXT: kxorw %k2, %k1, %k1
|
|
; AVX512BW-NEXT: kshiftlw $9, %k1, %k1
|
|
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
|
|
; AVX512BW-NEXT: kshiftlw $7, %k0, %k0
|
|
; AVX512BW-NEXT: korw %k0, %k1, %k0
|
|
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test18:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k1
|
|
; AVX512DQ-NEXT: kmovw %esi, %k2
|
|
; AVX512DQ-NEXT: kshiftrw $8, %k2, %k0
|
|
; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
|
|
; AVX512DQ-NEXT: kshiftrb $6, %k1, %k3
|
|
; AVX512DQ-NEXT: kxorb %k2, %k3, %k2
|
|
; AVX512DQ-NEXT: kshiftlb $7, %k2, %k2
|
|
; AVX512DQ-NEXT: kshiftrb $1, %k2, %k2
|
|
; AVX512DQ-NEXT: kxorb %k2, %k1, %k1
|
|
; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1
|
|
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
|
|
; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
|
|
; AVX512DQ-NEXT: korb %k0, %k1, %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test18:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kshiftrw $9, %k1, %k2
|
|
; X86-NEXT: kshiftrw $8, %k1, %k1
|
|
; X86-NEXT: kshiftlb $7, %k1, %k1
|
|
; X86-NEXT: kshiftrb $6, %k0, %k3
|
|
; X86-NEXT: kxorb %k2, %k3, %k2
|
|
; X86-NEXT: kshiftlb $7, %k2, %k2
|
|
; X86-NEXT: kshiftrb $1, %k2, %k2
|
|
; X86-NEXT: kxorb %k2, %k0, %k0
|
|
; X86-NEXT: kshiftlb $1, %k0, %k0
|
|
; X86-NEXT: kshiftrb $1, %k0, %k0
|
|
; X86-NEXT: korb %k1, %k0, %k0
|
|
; X86-NEXT: vpmovm2w %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%b = bitcast i8 %a to <8 x i1>
|
|
%b1 = bitcast i16 %y to <16 x i1>
|
|
%el1 = extractelement <16 x i1>%b1, i32 8
|
|
%el2 = extractelement <16 x i1>%b1, i32 9
|
|
%c = insertelement <8 x i1>%b, i1 %el1, i32 7
|
|
%d = insertelement <8 x i1>%c, i1 %el2, i32 6
|
|
ret <8 x i1>%d
|
|
}
|
|
define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
|
|
; KNL-LABEL: test21:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
|
|
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
|
|
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
|
; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
|
|
; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
|
|
; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0
|
|
; KNL-NEXT: vpsllw $15, %ymm3, %ymm2
|
|
; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
|
|
; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test21:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
; SKX-NEXT: vpmovb2m %ymm1, %k1
|
|
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test21:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
|
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test21:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
|
|
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
|
|
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
|
; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2
|
|
; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
|
|
; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0
|
|
; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm2
|
|
; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2
|
|
; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test21:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
; X86-NEXT: vpmovb2m %ymm1, %k1
|
|
; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; X86-NEXT: retl
|
|
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %ret
|
|
}
|
|
|
|
define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
|
|
; KNL-LABEL: test22:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test22:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test22:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test22:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test22:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovd2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
store <4 x i1> %a, <4 x i1>* %addr
|
|
ret void
|
|
}
|
|
|
|
define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
|
|
; KNL-LABEL: test23:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test23:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test23:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test23:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test23:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovq2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
store <2 x i1> %a, <2 x i1>* %addr
|
|
ret void
|
|
}
|
|
|
|
define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
|
|
; KNL-LABEL: store_v1i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kxnorw %k0, %k0, %k1
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rsi)
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_v1i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
|
; SKX-NEXT: kxorw %k1, %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rsi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_v1i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
|
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rsi)
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_v1i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kxnorw %k0, %k0, %k1
|
|
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rsi)
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v1i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kxnorw %k0, %k0, %k1
|
|
; X86-NEXT: kxorw %k1, %k0, %k0
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = xor <1 x i1> %c, <i1 1>
|
|
store <1 x i1> %x, <1 x i1>* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
|
|
; KNL-LABEL: store_v2i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: knotw %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_v2i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
|
; SKX-NEXT: knotw %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_v2i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; AVX512BW-NEXT: knotw %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_v2i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: knotw %k0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v2i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovq2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: knotw %k0, %k0
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = xor <2 x i1> %c, <i1 1, i1 1>
|
|
store <2 x i1> %x, <2 x i1>* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
|
|
; KNL-LABEL: store_v4i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: knotw %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_v4i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
; SKX-NEXT: knotw %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_v4i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; AVX512BW-NEXT: knotw %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_v4i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: knotw %k0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v4i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovd2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: knotw %k0, %k0
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
|
|
store <4 x i1> %x, <4 x i1>* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
|
|
; KNL-LABEL: store_v8i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: knotw %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_v8i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
; SKX-NEXT: knotb %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_v8i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
|
; AVX512BW-NEXT: knotw %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_v8i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: knotb %k0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v8i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovw2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: knotb %k0, %k0
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
|
|
store <8 x i1> %x, <8 x i1>* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
|
|
; KNL-LABEL: store_v16i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: knotw %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_v16i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
; SKX-NEXT: knotw %k0, %k0
|
|
; SKX-NEXT: kmovw %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_v16i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
|
; AVX512BW-NEXT: knotw %k0, %k0
|
|
; AVX512BW-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_v16i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: knotw %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v16i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovb2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: knotw %k0, %k0
|
|
; X86-NEXT: kmovw %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
|
|
store <16 x i1> %x, <16 x i1>* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
;void f2(int);
|
|
;void f1(int c)
|
|
;{
|
|
; static int v = 0;
|
|
; if (v == 0)
|
|
; v = 1;
|
|
; else
|
|
; v = 0;
|
|
; f2(v);
|
|
;}
|
|
|
|
@f1.v = internal unnamed_addr global i1 false, align 4
|
|
|
|
define void @f1(i32 %c) {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movzbl {{.*}}(%rip), %edi
|
|
; CHECK-NEXT: xorl $1, %edi
|
|
; CHECK-NEXT: movb %dil, {{.*}}(%rip)
|
|
; CHECK-NEXT: jmp _f2 ## TAILCALL
|
|
;
|
|
; X86-LABEL: f1:
|
|
; X86: ## %bb.0: ## %entry
|
|
; X86-NEXT: subl $12, %esp
|
|
; X86-NEXT: .cfi_def_cfa_offset 16
|
|
; X86-NEXT: movzbl _f1.v, %eax
|
|
; X86-NEXT: xorl $1, %eax
|
|
; X86-NEXT: movb %al, _f1.v
|
|
; X86-NEXT: movl %eax, (%esp)
|
|
; X86-NEXT: calll _f2
|
|
; X86-NEXT: addl $12, %esp
|
|
; X86-NEXT: retl
|
|
entry:
|
|
%.b1 = load i1, i1* @f1.v, align 4
|
|
%not..b1 = xor i1 %.b1, true
|
|
store i1 %not..b1, i1* @f1.v, align 4
|
|
%0 = zext i1 %not..b1 to i32
|
|
tail call void @f2(i32 %0) #2
|
|
ret void
|
|
}
|
|
|
|
declare void @f2(i32) #1
|
|
|
|
define void @store_i16_i1(i16 %x, i1 *%y) {
|
|
; CHECK-LABEL: store_i16_i1:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: andl $1, %edi
|
|
; CHECK-NEXT: movb %dil, (%rsi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_i16_i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: andl $1, %ecx
|
|
; X86-NEXT: movb %cl, (%eax)
|
|
; X86-NEXT: retl
|
|
%c = trunc i16 %x to i1
|
|
store i1 %c, i1* %y
|
|
ret void
|
|
}
|
|
|
|
define void @store_i8_i1(i8 %x, i1 *%y) {
|
|
; CHECK-LABEL: store_i8_i1:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: andl $1, %edi
|
|
; CHECK-NEXT: movb %dil, (%rsi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_i8_i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
|
; X86-NEXT: andb $1, %cl
|
|
; X86-NEXT: movb %cl, (%eax)
|
|
; X86-NEXT: retl
|
|
%c = trunc i8 %x to i1
|
|
store i1 %c, i1* %y
|
|
ret void
|
|
}
|
|
|
|
define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
|
|
; KNL-LABEL: test_build_vec_v32i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_build_vec_v32i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
|
; SKX-NEXT: kmovd %eax, %k1
|
|
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_build_vec_v32i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
|
; AVX512BW-NEXT: kmovd %eax, %k1
|
|
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_build_vec_v32i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_build_vec_v32i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl $1497715861, %eax ## imm = 0x59455495
|
|
; X86-NEXT: kmovd %eax, %k1
|
|
; X86-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; X86-NEXT: retl
|
|
%ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %ret
|
|
}
|
|
|
|
define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
|
|
; KNL-LABEL: test_build_vec_v64i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_build_vec_v64i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_build_vec_v64i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_build_vec_v64i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_build_vec_v64i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
|
|
; X86-NEXT: retl
|
|
%ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
|
|
ret <64 x i8> %ret
|
|
}
|
|
|
|
define void @ktest_1(<8 x double> %in, double * %base) {
|
|
; KNL-LABEL: ktest_1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vmovupd (%rdi), %zmm1
|
|
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
|
; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
|
; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: testb %al, %al
|
|
; KNL-NEXT: je LBB42_2
|
|
; KNL-NEXT: ## %bb.1: ## %L1
|
|
; KNL-NEXT: vmovapd %zmm0, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
; KNL-NEXT: LBB42_2: ## %L2
|
|
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: ktest_1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vmovupd (%rdi), %zmm1
|
|
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
|
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
|
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
|
; SKX-NEXT: kortestb %k0, %k0
|
|
; SKX-NEXT: je LBB42_2
|
|
; SKX-NEXT: ## %bb.1: ## %L1
|
|
; SKX-NEXT: vmovapd %zmm0, (%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
; SKX-NEXT: LBB42_2: ## %L2
|
|
; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: ktest_1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vmovupd (%rdi), %zmm1
|
|
; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
|
; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
|
; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: testb %al, %al
|
|
; AVX512BW-NEXT: je LBB42_2
|
|
; AVX512BW-NEXT: ## %bb.1: ## %L1
|
|
; AVX512BW-NEXT: vmovapd %zmm0, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
; AVX512BW-NEXT: LBB42_2: ## %L2
|
|
; AVX512BW-NEXT: vmovapd %zmm0, 8(%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: ktest_1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vmovupd (%rdi), %zmm1
|
|
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
|
; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
|
|
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
|
; AVX512DQ-NEXT: kortestb %k0, %k0
|
|
; AVX512DQ-NEXT: je LBB42_2
|
|
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
|
; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
; AVX512DQ-NEXT: LBB42_2: ## %L2
|
|
; AVX512DQ-NEXT: vmovapd %zmm0, 8(%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: ktest_1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: vmovupd (%eax), %zmm1
|
|
; X86-NEXT: vcmpltpd %zmm0, %zmm1, %k1
|
|
; X86-NEXT: vmovupd 8(%eax), %zmm1 {%k1} {z}
|
|
; X86-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
|
|
; X86-NEXT: kortestb %k0, %k0
|
|
; X86-NEXT: je LBB42_2
|
|
; X86-NEXT: ## %bb.1: ## %L1
|
|
; X86-NEXT: vmovapd %zmm0, (%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
; X86-NEXT: LBB42_2: ## %L2
|
|
; X86-NEXT: vmovapd %zmm0, 8(%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%addr1 = getelementptr double, double * %base, i64 0
|
|
%addr2 = getelementptr double, double * %base, i64 1
|
|
|
|
%vaddr1 = bitcast double* %addr1 to <8 x double>*
|
|
%vaddr2 = bitcast double* %addr2 to <8 x double>*
|
|
|
|
%val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
|
|
%val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
|
|
|
|
%sel1 = fcmp ogt <8 x double>%in, %val1
|
|
%val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
|
|
%sel2 = fcmp olt <8 x double> %in, %val3
|
|
%sel3 = and <8 x i1> %sel1, %sel2
|
|
|
|
%int_sel3 = bitcast <8 x i1> %sel3 to i8
|
|
%res = icmp eq i8 %int_sel3, zeroinitializer
|
|
br i1 %res, label %L2, label %L1
|
|
L1:
|
|
store <8 x double> %in, <8 x double>* %vaddr1
|
|
br label %End
|
|
L2:
|
|
store <8 x double> %in, <8 x double>* %vaddr2
|
|
br label %End
|
|
End:
|
|
ret void
|
|
}
|
|
|
|
define void @ktest_2(<32 x float> %in, float * %base) {
|
|
;
|
|
; KNL-LABEL: ktest_2:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vmovups (%rdi), %zmm2
|
|
; KNL-NEXT: vmovups 64(%rdi), %zmm3
|
|
; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
|
|
; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
|
|
; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
|
|
; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
|
|
; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0
|
|
; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3
|
|
; KNL-NEXT: korw %k3, %k2, %k2
|
|
; KNL-NEXT: kmovw %k2, %eax
|
|
; KNL-NEXT: korw %k0, %k1, %k0
|
|
; KNL-NEXT: kmovw %k0, %ecx
|
|
; KNL-NEXT: shll $16, %ecx
|
|
; KNL-NEXT: orl %eax, %ecx
|
|
; KNL-NEXT: je LBB43_2
|
|
; KNL-NEXT: ## %bb.1: ## %L1
|
|
; KNL-NEXT: vmovaps %zmm0, (%rdi)
|
|
; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
; KNL-NEXT: LBB43_2: ## %L2
|
|
; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
|
|
; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: ktest_2:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vmovups (%rdi), %zmm2
|
|
; SKX-NEXT: vmovups 64(%rdi), %zmm3
|
|
; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1
|
|
; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2
|
|
; SKX-NEXT: kunpckwd %k1, %k2, %k0
|
|
; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
|
|
; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
|
|
; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
|
|
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
|
; SKX-NEXT: kunpckwd %k1, %k2, %k1
|
|
; SKX-NEXT: kortestd %k1, %k0
|
|
; SKX-NEXT: je LBB43_2
|
|
; SKX-NEXT: ## %bb.1: ## %L1
|
|
; SKX-NEXT: vmovaps %zmm0, (%rdi)
|
|
; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
; SKX-NEXT: LBB43_2: ## %L2
|
|
; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
|
|
; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: ktest_2:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vmovups (%rdi), %zmm2
|
|
; AVX512BW-NEXT: vmovups 64(%rdi), %zmm3
|
|
; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k1
|
|
; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k2
|
|
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k0
|
|
; AVX512BW-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
|
|
; AVX512BW-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
|
|
; AVX512BW-NEXT: vcmpltps %zmm3, %zmm0, %k1
|
|
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
|
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
|
|
; AVX512BW-NEXT: kortestd %k1, %k0
|
|
; AVX512BW-NEXT: je LBB43_2
|
|
; AVX512BW-NEXT: ## %bb.1: ## %L1
|
|
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
|
|
; AVX512BW-NEXT: vmovaps %zmm1, 64(%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
; AVX512BW-NEXT: LBB43_2: ## %L2
|
|
; AVX512BW-NEXT: vmovaps %zmm0, 4(%rdi)
|
|
; AVX512BW-NEXT: vmovaps %zmm1, 68(%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: ktest_2:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
|
|
; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
|
|
; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
|
|
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2
|
|
; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
|
|
; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
|
|
; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0
|
|
; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3
|
|
; AVX512DQ-NEXT: korw %k3, %k2, %k2
|
|
; AVX512DQ-NEXT: kmovw %k2, %eax
|
|
; AVX512DQ-NEXT: korw %k0, %k1, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %ecx
|
|
; AVX512DQ-NEXT: shll $16, %ecx
|
|
; AVX512DQ-NEXT: orl %eax, %ecx
|
|
; AVX512DQ-NEXT: je LBB43_2
|
|
; AVX512DQ-NEXT: ## %bb.1: ## %L1
|
|
; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
|
|
; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
; AVX512DQ-NEXT: LBB43_2: ## %L2
|
|
; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
|
|
; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: ktest_2:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: vmovups (%eax), %zmm2
|
|
; X86-NEXT: vmovups 64(%eax), %zmm3
|
|
; X86-NEXT: vcmpltps %zmm0, %zmm2, %k1
|
|
; X86-NEXT: vcmpltps %zmm1, %zmm3, %k2
|
|
; X86-NEXT: kunpckwd %k1, %k2, %k0
|
|
; X86-NEXT: vmovups 68(%eax), %zmm2 {%k2} {z}
|
|
; X86-NEXT: vmovups 4(%eax), %zmm3 {%k1} {z}
|
|
; X86-NEXT: vcmpltps %zmm3, %zmm0, %k1
|
|
; X86-NEXT: vcmpltps %zmm2, %zmm1, %k2
|
|
; X86-NEXT: kunpckwd %k1, %k2, %k1
|
|
; X86-NEXT: kortestd %k1, %k0
|
|
; X86-NEXT: je LBB43_2
|
|
; X86-NEXT: ## %bb.1: ## %L1
|
|
; X86-NEXT: vmovaps %zmm0, (%eax)
|
|
; X86-NEXT: vmovaps %zmm1, 64(%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
; X86-NEXT: LBB43_2: ## %L2
|
|
; X86-NEXT: vmovaps %zmm0, 4(%eax)
|
|
; X86-NEXT: vmovaps %zmm1, 68(%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%addr1 = getelementptr float, float * %base, i64 0
|
|
%addr2 = getelementptr float, float * %base, i64 1
|
|
|
|
%vaddr1 = bitcast float* %addr1 to <32 x float>*
|
|
%vaddr2 = bitcast float* %addr2 to <32 x float>*
|
|
|
|
%val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
|
|
%val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
|
|
|
|
%sel1 = fcmp ogt <32 x float>%in, %val1
|
|
%val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
|
|
%sel2 = fcmp olt <32 x float> %in, %val3
|
|
%sel3 = or <32 x i1> %sel1, %sel2
|
|
|
|
%int_sel3 = bitcast <32 x i1> %sel3 to i32
|
|
%res = icmp eq i32 %int_sel3, zeroinitializer
|
|
br i1 %res, label %L2, label %L1
|
|
L1:
|
|
store <32 x float> %in, <32 x float>* %vaddr1
|
|
br label %End
|
|
L2:
|
|
store <32 x float> %in, <32 x float>* %vaddr2
|
|
br label %End
|
|
End:
|
|
ret void
|
|
}
|
|
|
|
define <8 x i64> @load_8i1(<8 x i1>* %a) {
|
|
; KNL-LABEL: load_8i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: movzbl (%rdi), %eax
|
|
; KNL-NEXT: kmovw %eax, %k1
|
|
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: load_8i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
; SKX-NEXT: vpmovm2q %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: load_8i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: movzbl (%rdi), %eax
|
|
; AVX512BW-NEXT: kmovd %eax, %k1
|
|
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: load_8i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovb (%rdi), %k0
|
|
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: load_8i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb (%eax), %k0
|
|
; X86-NEXT: vpmovm2q %k0, %zmm0
|
|
; X86-NEXT: retl
|
|
%b = load <8 x i1>, <8 x i1>* %a
|
|
%c = sext <8 x i1> %b to <8 x i64>
|
|
ret <8 x i64> %c
|
|
}
|
|
|
|
define <16 x i32> @load_16i1(<16 x i1>* %a) {
|
|
; KNL-LABEL: load_16i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw (%rdi), %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: load_16i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovw (%rdi), %k0
|
|
; SKX-NEXT: vpmovm2d %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: load_16i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovw (%rdi), %k1
|
|
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: load_16i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw (%rdi), %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: load_16i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovw (%eax), %k0
|
|
; X86-NEXT: vpmovm2d %k0, %zmm0
|
|
; X86-NEXT: retl
|
|
%b = load <16 x i1>, <16 x i1>* %a
|
|
%c = sext <16 x i1> %b to <16 x i32>
|
|
ret <16 x i32> %c
|
|
}
|
|
|
|
define <2 x i16> @load_2i1(<2 x i1>* %a) {
|
|
; KNL-LABEL: load_2i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: movzbl (%rdi), %eax
|
|
; KNL-NEXT: kmovw %eax, %k1
|
|
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: load_2i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
; SKX-NEXT: vpmovm2q %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: load_2i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: movzbl (%rdi), %eax
|
|
; AVX512BW-NEXT: kmovd %eax, %k1
|
|
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: load_2i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovb (%rdi), %k0
|
|
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: load_2i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb (%eax), %k0
|
|
; X86-NEXT: vpmovm2q %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%b = load <2 x i1>, <2 x i1>* %a
|
|
%c = sext <2 x i1> %b to <2 x i16>
|
|
ret <2 x i16> %c
|
|
}
|
|
|
|
define <4 x i16> @load_4i1(<4 x i1>* %a) {
|
|
; KNL-LABEL: load_4i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: movzbl (%rdi), %eax
|
|
; KNL-NEXT: kmovw %eax, %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: load_4i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: load_4i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: movzbl (%rdi), %eax
|
|
; AVX512BW-NEXT: kmovd %eax, %k1
|
|
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: load_4i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovb (%rdi), %k0
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: load_4i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb (%eax), %k0
|
|
; X86-NEXT: vpmovm2d %k0, %xmm0
|
|
; X86-NEXT: retl
|
|
%b = load <4 x i1>, <4 x i1>* %a
|
|
%c = sext <4 x i1> %b to <4 x i16>
|
|
ret <4 x i16> %c
|
|
}
|
|
|
|
define <32 x i16> @load_32i1(<32 x i1>* %a) {
|
|
; KNL-LABEL: load_32i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw (%rdi), %k1
|
|
; KNL-NEXT: kmovw 2(%rdi), %k2
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; KNL-NEXT: vpmovdw %zmm1, %ymm1
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: load_32i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd (%rdi), %k0
|
|
; SKX-NEXT: vpmovm2w %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: load_32i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd (%rdi), %k0
|
|
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: load_32i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw (%rdi), %k0
|
|
; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
|
|
; AVX512DQ-NEXT: vpmovdw %zmm1, %ymm1
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: load_32i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovd (%eax), %k0
|
|
; X86-NEXT: vpmovm2w %k0, %zmm0
|
|
; X86-NEXT: retl
|
|
%b = load <32 x i1>, <32 x i1>* %a
|
|
%c = sext <32 x i1> %b to <32 x i16>
|
|
ret <32 x i16> %c
|
|
}
|
|
|
|
define <64 x i8> @load_64i1(<64 x i1>* %a) {
|
|
; KNL-LABEL: load_64i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw (%rdi), %k1
|
|
; KNL-NEXT: kmovw 2(%rdi), %k2
|
|
; KNL-NEXT: kmovw 4(%rdi), %k3
|
|
; KNL-NEXT: kmovw 6(%rdi), %k4
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
|
|
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
|
|
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
|
; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z}
|
|
; KNL-NEXT: vpmovdb %zmm2, %xmm2
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: load_64i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovq (%rdi), %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: load_64i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovq (%rdi), %k0
|
|
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: load_64i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw (%rdi), %k0
|
|
; AVX512DQ-NEXT: kmovw 2(%rdi), %k1
|
|
; AVX512DQ-NEXT: kmovw 4(%rdi), %k2
|
|
; AVX512DQ-NEXT: kmovw 6(%rdi), %k3
|
|
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k1, %zmm1
|
|
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
|
|
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
|
|
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
|
|
; AVX512DQ-NEXT: vpmovm2d %k3, %zmm2
|
|
; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
|
|
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: load_64i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovq (%eax), %k0
|
|
; X86-NEXT: vpmovm2b %k0, %zmm0
|
|
; X86-NEXT: retl
|
|
%b = load <64 x i1>, <64 x i1>* %a
|
|
%c = sext <64 x i1> %b to <64 x i8>
|
|
ret <64 x i8> %c
|
|
}
|
|
|
|
define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
|
|
; KNL-LABEL: store_8i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_8i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_8i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_8i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_8i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovw2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
store <8 x i1> %v, <8 x i1>* %a
|
|
ret void
|
|
}
|
|
|
|
define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
|
|
; KNL-LABEL: store_8i1_1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_8i1_1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_8i1_1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movb %al, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_8i1_1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_8i1_1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovw2m %xmm0, %k0
|
|
; X86-NEXT: kmovb %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
%v1 = trunc <8 x i16> %v to <8 x i1>
|
|
store <8 x i1> %v1, <8 x i1>* %a
|
|
ret void
|
|
}
|
|
|
|
define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
|
|
; KNL-LABEL: store_16i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_16i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
; SKX-NEXT: kmovw %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_16i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_16i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_16i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; X86-NEXT: vpmovb2m %xmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovw %k0, (%eax)
|
|
; X86-NEXT: retl
|
|
store <16 x i1> %v, <16 x i1>* %a
|
|
ret void
|
|
}
|
|
|
|
define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
|
|
; KNL-LABEL: store_32i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; KNL-NEXT: kmovw %k1, 2(%rdi)
|
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_32i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
|
; SKX-NEXT: kmovd %k0, (%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_32i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_32i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0
|
|
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
|
|
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
|
|
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_32i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
; X86-NEXT: vpmovb2m %ymm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovd %k0, (%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
store <32 x i1> %v, <32 x i1>* %a
|
|
ret void
|
|
}
|
|
|
|
define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
|
|
; KNL-LABEL: store_32i1_1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; KNL-NEXT: kmovw %k1, 2(%rdi)
|
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_32i1_1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $15, %zmm0, %zmm0
|
|
; SKX-NEXT: vpmovw2m %zmm0, %k0
|
|
; SKX-NEXT: kmovd %k0, (%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_32i1_1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_32i1_1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
|
|
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
|
|
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_32i1_1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: vpsllw $15, %zmm0, %zmm0
|
|
; X86-NEXT: vpmovw2m %zmm0, %k0
|
|
; X86-NEXT: kmovd %k0, (%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%v1 = trunc <32 x i16> %v to <32 x i1>
|
|
store <32 x i1> %v1, <32 x i1>* %a
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|
;
|
|
; KNL-LABEL: store_64i1:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
|
|
; KNL-NEXT: vpmovsxbd %xmm3, %zmm0
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3
|
|
; KNL-NEXT: kmovw %k3, 6(%rdi)
|
|
; KNL-NEXT: kmovw %k2, 4(%rdi)
|
|
; KNL-NEXT: kmovw %k1, 2(%rdi)
|
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: store_64i1:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
; SKX-NEXT: kmovq %k0, (%rdi)
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: store_64i1:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovq %k0, (%rdi)
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: store_64i1:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2
|
|
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0
|
|
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3
|
|
; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
|
|
; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
|
|
; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
|
|
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_64i1:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vpsllw $7, %zmm0, %zmm0
|
|
; X86-NEXT: vpmovb2m %zmm0, %k0
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: kmovq %k0, (%eax)
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
store <64 x i1> %v, <64 x i1>* %a
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
|
|
; KNL-LABEL: test_bitcast_v8i1_zext:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: addl %eax, %eax
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_bitcast_v8i1_zext:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, %eax
|
|
; SKX-NEXT: addl %eax, %eax
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_bitcast_v8i1_zext:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: movzbl %al, %eax
|
|
; AVX512BW-NEXT: addl %eax, %eax
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovb %k0, %eax
|
|
; AVX512DQ-NEXT: addl %eax, %eax
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_bitcast_v8i1_zext:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; X86-NEXT: kmovb %k0, %eax
|
|
; X86-NEXT: addl %eax, %eax
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%v1 = icmp eq <16 x i32> %a, zeroinitializer
|
|
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%mask1 = bitcast <8 x i1> %mask to i8
|
|
%val = zext i8 %mask1 to i32
|
|
%val1 = add i32 %val, %val
|
|
ret i32 %val1
|
|
}
|
|
|
|
define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
|
|
; CHECK-LABEL: test_bitcast_v16i1_zext:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; CHECK-NEXT: kmovw %k0, %eax
|
|
; CHECK-NEXT: addl %eax, %eax
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_bitcast_v16i1_zext:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; X86-NEXT: kmovw %k0, %eax
|
|
; X86-NEXT: addl %eax, %eax
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%v1 = icmp eq <16 x i32> %a, zeroinitializer
|
|
%mask1 = bitcast <16 x i1> %v1 to i16
|
|
%val = zext i16 %mask1 to i32
|
|
%val1 = add i32 %val, %val
|
|
ret i32 %val1
|
|
}
|
|
|
|
define i16 @test_v16i1_add(i16 %x, i16 %y) {
|
|
; KNL-LABEL: test_v16i1_add:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_v16i1_add:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kmovd %esi, %k1
|
|
; SKX-NEXT: kxorw %k1, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_v16i1_add:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_v16i1_add:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kmovw %esi, %k1
|
|
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_v16i1_add:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kxorw %k1, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = bitcast i16 %y to <16 x i1>
|
|
%m2 = add <16 x i1> %m0, %m1
|
|
%ret = bitcast <16 x i1> %m2 to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i16 @test_v16i1_sub(i16 %x, i16 %y) {
|
|
; KNL-LABEL: test_v16i1_sub:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_v16i1_sub:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kmovd %esi, %k1
|
|
; SKX-NEXT: kxorw %k1, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_v16i1_sub:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_v16i1_sub:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kmovw %esi, %k1
|
|
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_v16i1_sub:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kxorw %k1, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = bitcast i16 %y to <16 x i1>
|
|
%m2 = sub <16 x i1> %m0, %m1
|
|
%ret = bitcast <16 x i1> %m2 to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i16 @test_v16i1_mul(i16 %x, i16 %y) {
|
|
; KNL-LABEL: test_v16i1_mul:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kandw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_v16i1_mul:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kmovd %esi, %k1
|
|
; SKX-NEXT: kandw %k1, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_v16i1_mul:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_v16i1_mul:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kmovw %esi, %k1
|
|
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_v16i1_mul:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kandw %k1, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = bitcast i16 %y to <16 x i1>
|
|
%m2 = mul <16 x i1> %m0, %m1
|
|
%ret = bitcast <16 x i1> %m2 to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i8 @test_v8i1_add(i8 %x, i8 %y) {
|
|
; KNL-LABEL: test_v8i1_add:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_v8i1_add:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kmovd %esi, %k1
|
|
; SKX-NEXT: kxorb %k1, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_v8i1_add:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_v8i1_add:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kmovw %esi, %k1
|
|
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_v8i1_add:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kxorb %k1, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $al killed $al killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = bitcast i8 %y to <8 x i1>
|
|
%m2 = add <8 x i1> %m0, %m1
|
|
%ret = bitcast <8 x i1> %m2 to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
define i8 @test_v8i1_sub(i8 %x, i8 %y) {
|
|
; KNL-LABEL: test_v8i1_sub:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_v8i1_sub:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kmovd %esi, %k1
|
|
; SKX-NEXT: kxorb %k1, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_v8i1_sub:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_v8i1_sub:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kmovw %esi, %k1
|
|
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_v8i1_sub:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kxorb %k1, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $al killed $al killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = bitcast i8 %y to <8 x i1>
|
|
%m2 = sub <8 x i1> %m0, %m1
|
|
%ret = bitcast <8 x i1> %m2 to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
define i8 @test_v8i1_mul(i8 %x, i8 %y) {
|
|
; KNL-LABEL: test_v8i1_mul:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kandw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: ## kill: def $al killed $al killed $eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test_v8i1_mul:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: kmovd %edi, %k0
|
|
; SKX-NEXT: kmovd %esi, %k1
|
|
; SKX-NEXT: kandb %k1, %k0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: ## kill: def $al killed $al killed $eax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: test_v8i1_mul:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k0
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: kandw %k1, %k0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: test_v8i1_mul:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: kmovw %edi, %k0
|
|
; AVX512DQ-NEXT: kmovw %esi, %k1
|
|
; AVX512DQ-NEXT: kandb %k1, %k0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: test_v8i1_mul:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
|
|
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
|
|
; X86-NEXT: kandb %k1, %k0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: ## kill: def $al killed $al killed $eax
|
|
; X86-NEXT: retl
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = bitcast i8 %y to <8 x i1>
|
|
%m2 = mul <8 x i1> %m0, %m1
|
|
%ret = bitcast <8 x i1> %m2 to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
; Make sure we don't emit a ktest for signed comparisons.
|
|
define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
|
; KNL-LABEL: ktest_signed:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: pushq %rax
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: testw %ax, %ax
|
|
; KNL-NEXT: jle LBB64_1
|
|
; KNL-NEXT: ## %bb.2: ## %bb.2
|
|
; KNL-NEXT: popq %rax
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: retq
|
|
; KNL-NEXT: LBB64_1: ## %bb.1
|
|
; KNL-NEXT: vzeroupper
|
|
; KNL-NEXT: callq _foo
|
|
; KNL-NEXT: popq %rax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: ktest_signed:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: pushq %rax
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
; SKX-NEXT: testw %ax, %ax
|
|
; SKX-NEXT: jle LBB64_1
|
|
; SKX-NEXT: ## %bb.2: ## %bb.2
|
|
; SKX-NEXT: popq %rax
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: retq
|
|
; SKX-NEXT: LBB64_1: ## %bb.1
|
|
; SKX-NEXT: vzeroupper
|
|
; SKX-NEXT: callq _foo
|
|
; SKX-NEXT: popq %rax
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: ktest_signed:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: pushq %rax
|
|
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
|
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; AVX512BW-NEXT: kmovd %k0, %eax
|
|
; AVX512BW-NEXT: testw %ax, %ax
|
|
; AVX512BW-NEXT: jle LBB64_1
|
|
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
|
|
; AVX512BW-NEXT: popq %rax
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: retq
|
|
; AVX512BW-NEXT: LBB64_1: ## %bb.1
|
|
; AVX512BW-NEXT: vzeroupper
|
|
; AVX512BW-NEXT: callq _foo
|
|
; AVX512BW-NEXT: popq %rax
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: ktest_signed:
|
|
; AVX512DQ: ## %bb.0:
|
|
; AVX512DQ-NEXT: pushq %rax
|
|
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
|
|
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; AVX512DQ-NEXT: kmovw %k0, %eax
|
|
; AVX512DQ-NEXT: testw %ax, %ax
|
|
; AVX512DQ-NEXT: jle LBB64_1
|
|
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
|
|
; AVX512DQ-NEXT: popq %rax
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: retq
|
|
; AVX512DQ-NEXT: LBB64_1: ## %bb.1
|
|
; AVX512DQ-NEXT: vzeroupper
|
|
; AVX512DQ-NEXT: callq _foo
|
|
; AVX512DQ-NEXT: popq %rax
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: ktest_signed:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: subl $12, %esp
|
|
; X86-NEXT: .cfi_def_cfa_offset 16
|
|
; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; X86-NEXT: kmovd %k0, %eax
|
|
; X86-NEXT: testw %ax, %ax
|
|
; X86-NEXT: jle LBB64_1
|
|
; X86-NEXT: ## %bb.2: ## %bb.2
|
|
; X86-NEXT: addl $12, %esp
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
; X86-NEXT: LBB64_1: ## %bb.1
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: calll _foo
|
|
; X86-NEXT: addl $12, %esp
|
|
; X86-NEXT: retl
|
|
%a = icmp eq <16 x i32> %x, zeroinitializer
|
|
%b = icmp eq <16 x i32> %y, zeroinitializer
|
|
%c = and <16 x i1> %a, %b
|
|
%d = bitcast <16 x i1> %c to i16
|
|
%e = icmp sgt i16 %d, 0
|
|
br i1 %e, label %bb.2, label %bb.1
|
|
bb.1:
|
|
call void @foo()
|
|
br label %bb.2
|
|
bb.2:
|
|
ret void
|
|
}
|
|
declare void @foo()
|
|
|
|
; Make sure we can use the C flag from kortest to check for all ones.
|
|
define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
|
|
; CHECK-LABEL: ktest_allones:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: pushq %rax
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; CHECK-NEXT: kortestw %k0, %k0
|
|
; CHECK-NEXT: jb LBB65_2
|
|
; CHECK-NEXT: ## %bb.1: ## %bb.1
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: callq _foo
|
|
; CHECK-NEXT: LBB65_2: ## %bb.2
|
|
; CHECK-NEXT: popq %rax
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: ktest_allones:
|
|
; X86: ## %bb.0:
|
|
; X86-NEXT: subl $12, %esp
|
|
; X86-NEXT: .cfi_def_cfa_offset 16
|
|
; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
|
|
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
|
; X86-NEXT: kortestw %k0, %k0
|
|
; X86-NEXT: jb LBB65_2
|
|
; X86-NEXT: ## %bb.1: ## %bb.1
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: calll _foo
|
|
; X86-NEXT: LBB65_2: ## %bb.2
|
|
; X86-NEXT: addl $12, %esp
|
|
; X86-NEXT: vzeroupper
|
|
; X86-NEXT: retl
|
|
%a = icmp eq <16 x i32> %x, zeroinitializer
|
|
%b = icmp eq <16 x i32> %y, zeroinitializer
|
|
%c = and <16 x i1> %a, %b
|
|
%d = bitcast <16 x i1> %c to i16
|
|
%e = icmp eq i16 %d, -1
|
|
br i1 %e, label %bb.2, label %bb.1
|
|
bb.1:
|
|
call void @foo()
|
|
br label %bb.2
|
|
bb.2:
|
|
ret void
|
|
}
|
|
|
|
; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
|
|
; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
|
|
define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
|
|
; KNL-LABEL: mask_widening:
|
|
; KNL: ## %bb.0: ## %entry
|
|
; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
|
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; KNL-NEXT: kshiftlw $12, %k0, %k0
|
|
; KNL-NEXT: kshiftrw $12, %k0, %k1
|
|
; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: mask_widening:
|
|
; SKX: ## %bb.0: ## %entry
|
|
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
|
|
; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
|
|
; SKX-NEXT: retq
|
|
;
|
|
; AVX512BW-LABEL: mask_widening:
|
|
; AVX512BW: ## %bb.0: ## %entry
|
|
; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
|
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
|
|
; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
|
|
; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512DQ-LABEL: mask_widening:
|
|
; AVX512DQ: ## %bb.0: ## %entry
|
|
; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
|
|
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
|
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
|
|
; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
|
|
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
|
|
; AVX512DQ-NEXT: retq
|
|
;
|
|
; X86-LABEL: mask_widening:
|
|
; X86: ## %bb.0: ## %entry
|
|
; X86-NEXT: pushl %ebp
|
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
|
; X86-NEXT: .cfi_offset %ebp, -8
|
|
; X86-NEXT: movl %esp, %ebp
|
|
; X86-NEXT: .cfi_def_cfa_register %ebp
|
|
; X86-NEXT: andl $-64, %esp
|
|
; X86-NEXT: subl $64, %esp
|
|
; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
|
|
; X86-NEXT: vmovdqa64 8(%ebp), %zmm0
|
|
; X86-NEXT: vmovdqa32 72(%ebp), %zmm0 {%k1}
|
|
; X86-NEXT: movl %ebp, %esp
|
|
; X86-NEXT: popl %ebp
|
|
; X86-NEXT: retl
|
|
entry:
|
|
%0 = bitcast <2 x i64> %a to <4 x i32>
|
|
%1 = bitcast <2 x i64> %b to <4 x i32>
|
|
%2 = icmp eq <4 x i32> %0, %1
|
|
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%4 = bitcast <8 x i64> %f to <16 x i32>
|
|
%5 = bitcast <8 x i64> %e to <16 x i32>
|
|
%6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
|
%7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
|
|
%8 = bitcast <16 x i32> %7 to <8 x i64>
|
|
ret <8 x i64> %8
|
|
}
|
|
|
|
define void @store_v64i1_constant(<64 x i1>* %R) {
|
|
; CHECK-LABEL: store_v64i1_constant:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
|
|
; CHECK-NEXT: movq %rax, (%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v64i1_constant:
|
|
; X86: ## %bb.0: ## %entry
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
|
|
; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD
|
|
; X86-NEXT: retl
|
|
entry:
|
|
store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
|
|
ret void
|
|
}
|
|
|
|
define void @store_v2i1_constant(<2 x i1>* %R) {
|
|
; CHECK-LABEL: store_v2i1_constant:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movb $1, (%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v2i1_constant:
|
|
; X86: ## %bb.0: ## %entry
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movb $1, (%eax)
|
|
; X86-NEXT: retl
|
|
entry:
|
|
store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
|
|
ret void
|
|
}
|
|
|
|
define void @store_v4i1_constant(<4 x i1>* %R) {
|
|
; CHECK-LABEL: store_v4i1_constant:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movb $5, (%rdi)
|
|
; CHECK-NEXT: retq
|
|
;
|
|
; X86-LABEL: store_v4i1_constant:
|
|
; X86: ## %bb.0: ## %entry
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movb $5, (%eax)
|
|
; X86-NEXT: retl
|
|
entry:
|
|
store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
|
|
ret void
|
|
}
|