forked from OSchip/llvm-project
1445 lines
46 KiB
LLVM
1445 lines
46 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
|
|
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
|
|
|
define i16 @mask16(i16 %x) {
|
|
; CHECK-LABEL: mask16:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k0
|
|
; CHECK-NEXT: knotw %k0, %k0
|
|
; CHECK-NEXT: kmovw %k0, %eax
|
|
; CHECK-NEXT: retq
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <16 x i1> %m1 to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i8 @mask8(i8 %x) {
|
|
; KNL-LABEL: mask8:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: knotw %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: mask8:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: kmovb %edi, %k0
|
|
; SKX-NEXT: knotb %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, %eax
|
|
; SKX-NEXT: retq
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <8 x i1> %m1 to i8
|
|
ret i8 %ret
|
|
}
|
|
|
|
define void @mask16_mem(i16* %ptr) {
|
|
; CHECK-LABEL: mask16_mem:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw (%rdi), %k0
|
|
; CHECK-NEXT: knotw %k0, %k0
|
|
; CHECK-NEXT: kmovw %k0, (%rdi)
|
|
; CHECK-NEXT: retq
|
|
%x = load i16, i16* %ptr, align 4
|
|
%m0 = bitcast i16 %x to <16 x i1>
|
|
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <16 x i1> %m1 to i16
|
|
store i16 %ret, i16* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @mask8_mem(i8* %ptr) {
|
|
; KNL-LABEL: mask8_mem:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: kmovw (%rdi), %k0
|
|
; KNL-NEXT: knotw %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: mask8_mem:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
; SKX-NEXT: knotb %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
%x = load i8, i8* %ptr, align 4
|
|
%m0 = bitcast i8 %x to <8 x i1>
|
|
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
|
|
%ret = bitcast <8 x i1> %m1 to i8
|
|
store i8 %ret, i8* %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
define i16 @mand16(i16 %x, i16 %y) {
|
|
; CHECK-LABEL: mand16:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: kmovw %edi, %k0
|
|
; CHECK-NEXT: kmovw %esi, %k1
|
|
; CHECK-NEXT: kandw %k1, %k0, %k2
|
|
; CHECK-NEXT: kxorw %k1, %k0, %k0
|
|
; CHECK-NEXT: korw %k0, %k2, %k0
|
|
; CHECK-NEXT: kmovw %k0, %eax
|
|
; CHECK-NEXT: retq
|
|
%ma = bitcast i16 %x to <16 x i1>
|
|
%mb = bitcast i16 %y to <16 x i1>
|
|
%mc = and <16 x i1> %ma, %mb
|
|
%md = xor <16 x i1> %ma, %mb
|
|
%me = or <16 x i1> %mc, %md
|
|
%ret = bitcast <16 x i1> %me to i16
|
|
ret i16 %ret
|
|
}
|
|
|
|
define i8 @shuf_test1(i16 %v) nounwind {
|
|
; KNL-LABEL: shuf_test1:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kshiftrw $8, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: shuf_test1:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: kmovw %edi, %k0
|
|
; SKX-NEXT: kshiftrw $8, %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, %eax
|
|
; SKX-NEXT: retq
|
|
%v1 = bitcast i16 %v to <16 x i1>
|
|
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%mask1 = bitcast <8 x i1> %mask to i8
|
|
ret i8 %mask1
|
|
}
|
|
|
|
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|
; CHECK-LABEL: zext_test1:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
; CHECK-NEXT: kshiftlw $10, %k0, %k0
|
|
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
|
; CHECK-NEXT: kmovw %k0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
%cmp_res = icmp ugt <16 x i32> %a, %b
|
|
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
|
%res = zext i1 %cmp_res.i1 to i32
|
|
ret i32 %res
|
|
}define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|
%cmp_res = icmp ugt <16 x i32> %a, %b
|
|
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
|
%res = zext i1 %cmp_res.i1 to i16
|
|
ret i16 %res
|
|
}define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|
%cmp_res = icmp ugt <16 x i32> %a, %b
|
|
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
|
%res = zext i1 %cmp_res.i1 to i8
|
|
ret i8 %res
|
|
}
|
|
|
|
define i8 @conv1(<8 x i1>* %R) {
|
|
; KNL-LABEL: conv1:
|
|
; KNL: ## BB#0: ## %entry
|
|
; KNL-NEXT: kxnorw %k0, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
|
|
; KNL-NEXT: movb $-2, %al
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: conv1:
|
|
; SKX: ## BB#0: ## %entry
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
|
|
; SKX-NEXT: movb $-2, %al
|
|
; SKX-NEXT: retq
|
|
entry:
|
|
store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
|
|
|
|
%maskPtr = alloca <8 x i1>
|
|
store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
|
|
%mask = load <8 x i1>, <8 x i1>* %maskPtr
|
|
%mask_convert = bitcast <8 x i1> %mask to i8
|
|
ret i8 %mask_convert
|
|
}
|
|
|
|
define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
|
|
; KNL-LABEL: test4:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
|
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
|
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
|
|
; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
|
|
; KNL-NEXT: vpmovqd %zmm1, %ymm1
|
|
; KNL-NEXT: vpslld $31, %xmm1, %xmm1
|
|
; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
|
|
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test4:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0
|
|
; SKX-NEXT: knotw %k0, %k1
|
|
; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
|
|
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%x_gt_y = icmp sgt <4 x i64> %x, %y
|
|
%x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
|
|
%res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
|
|
%resse = sext <4 x i1>%res to <4 x i32>
|
|
ret <4 x i32> %resse
|
|
}
|
|
|
|
define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
|
|
; KNL-LABEL: test5:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
|
|
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test5:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
|
|
; SKX-NEXT: knotw %k0, %k1
|
|
; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
|
|
; SKX-NEXT: vpmovm2q %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%x_gt_y = icmp slt <2 x i64> %x, %y
|
|
%x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
|
|
%res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
|
|
%resse = sext <2 x i1>%res to <2 x i64>
|
|
ret <2 x i64> %resse
|
|
}define void @test6(<16 x i1> %mask) {
|
|
allocas:
|
|
%a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
|
|
%b = bitcast <16 x i1> %a to i16
|
|
%c = icmp eq i16 %b, 0
|
|
br i1 %c, label %true, label %false
|
|
|
|
true:
|
|
ret void
|
|
|
|
false:
|
|
ret void
|
|
}
|
|
define void @test7(<8 x i1> %mask) {
|
|
; KNL-LABEL: test7:
|
|
; KNL: ## BB#0: ## %allocas
|
|
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
|
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
|
; KNL-NEXT: movb $85, %al
|
|
; KNL-NEXT: kmovw %eax, %k1
|
|
; KNL-NEXT: korw %k1, %k0, %k0
|
|
; KNL-NEXT: kmovw %k0, %eax
|
|
; KNL-NEXT: testb %al, %al
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test7:
|
|
; SKX: ## BB#0: ## %allocas
|
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
|
; SKX-NEXT: movb $85, %al
|
|
; SKX-NEXT: kmovb %eax, %k1
|
|
; SKX-NEXT: korb %k1, %k0, %k0
|
|
; SKX-NEXT: kmovb %k0, %eax
|
|
; SKX-NEXT: testb %al, %al
|
|
; SKX-NEXT: retq
|
|
allocas:
|
|
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
|
|
%b = bitcast <8 x i1> %a to i8
|
|
%c = icmp eq i8 %b, 0
|
|
br i1 %c, label %true, label %false
|
|
|
|
true:
|
|
ret void
|
|
|
|
false:
|
|
ret void
|
|
}
|
|
define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
|
|
; KNL-LABEL: test8:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: jg LBB14_1
|
|
; KNL-NEXT: ## BB#2:
|
|
; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
|
|
; KNL-NEXT: jmp LBB14_3
|
|
; KNL-NEXT: LBB14_1:
|
|
; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
|
|
; KNL-NEXT: LBB14_3:
|
|
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test8:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: jg LBB14_1
|
|
; SKX-NEXT: ## BB#2:
|
|
; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
; SKX-NEXT: LBB14_1:
|
|
; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%cond = icmp sgt i32 %a1, %b1
|
|
%cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
|
|
%cmp2 = icmp ult <16 x i32> %b, zeroinitializer
|
|
%mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
|
|
%res = sext <16 x i1> %mix to <16 x i8>
|
|
ret <16 x i8> %res
|
|
}
|
|
define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
|
|
; KNL-LABEL: test9:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: jg LBB15_1
|
|
; KNL-NEXT: ## BB#2:
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
|
|
; KNL-NEXT: jmp LBB15_3
|
|
; KNL-NEXT: LBB15_1:
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
; KNL-NEXT: LBB15_3:
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
|
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test9:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: jg LBB15_1
|
|
; SKX-NEXT: ## BB#2:
|
|
; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
|
|
; SKX-NEXT: jmp LBB15_3
|
|
; SKX-NEXT: LBB15_1:
|
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
|
; SKX-NEXT: LBB15_3:
|
|
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%mask = icmp sgt i32 %a1, %b1
|
|
%c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
|
|
ret <16 x i1>%c
|
|
}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
|
|
%mask = icmp sgt i32 %a1, %b1
|
|
%c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
|
|
ret <8 x i1>%c
|
|
}
|
|
|
|
define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
|
|
; KNL-LABEL: test11:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: jg LBB17_2
|
|
; KNL-NEXT: ## BB#1:
|
|
; KNL-NEXT: vmovaps %zmm1, %zmm0
|
|
; KNL-NEXT: LBB17_2:
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test11:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: jg LBB17_1
|
|
; SKX-NEXT: ## BB#2:
|
|
; SKX-NEXT: vpslld $31, %xmm1, %xmm0
|
|
; SKX-NEXT: jmp LBB17_3
|
|
; SKX-NEXT: LBB17_1:
|
|
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; SKX-NEXT: LBB17_3:
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%mask = icmp sgt i32 %a1, %b1
|
|
%c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
|
|
ret <4 x i1>%c
|
|
}
|
|
|
|
define i32 @test12(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: test12:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: movl %edi, %eax
|
|
; CHECK-NEXT: retq
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = extractelement <16 x i1> %a, i32 0
|
|
%c = select i1 %b, i32 %x, i32 %y
|
|
ret i32 %c
|
|
}
|
|
|
|
define i32 @test13(i32 %x, i32 %y) {
|
|
; CHECK-LABEL: test13:
|
|
; CHECK: ## BB#0:
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
; CHECK-NEXT: retq
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = extractelement <16 x i1> %a, i32 3
|
|
%c = select i1 %b, i32 %x, i32 %y
|
|
ret i32 %c
|
|
}define <4 x i1> @test14() {
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = extractelement <16 x i1> %a, i32 2
|
|
%c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <16 x i1> @test15(i32 %x, i32 %y) {
|
|
; KNL-LABEL: test15:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: cmpl %esi, %edi
|
|
; KNL-NEXT: movw $21845, %ax ## imm = 0x5555
|
|
; KNL-NEXT: movw $1, %cx
|
|
; KNL-NEXT: cmovgw %ax, %cx
|
|
; KNL-NEXT: kmovw %ecx, %k1
|
|
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test15:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: cmpl %esi, %edi
|
|
; SKX-NEXT: movw $21845, %ax ## imm = 0x5555
|
|
; SKX-NEXT: movw $1, %cx
|
|
; SKX-NEXT: cmovgw %ax, %cx
|
|
; SKX-NEXT: kmovw %ecx, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%a = bitcast i16 21845 to <16 x i1>
|
|
%b = bitcast i16 1 to <16 x i1>
|
|
%mask = icmp sgt i32 %x, %y
|
|
%c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
|
|
ret <16 x i1> %c
|
|
}
|
|
|
|
define <64 x i8> @test16(i64 %x) {
|
|
; KNL-LABEL: test16:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: pushq %rbp
|
|
; KNL-NEXT: Ltmp0:
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
; KNL-NEXT: Ltmp1:
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
; KNL-NEXT: Ltmp2:
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
; KNL-NEXT: pushq %r15
|
|
; KNL-NEXT: pushq %r14
|
|
; KNL-NEXT: pushq %r13
|
|
; KNL-NEXT: pushq %r12
|
|
; KNL-NEXT: pushq %rbx
|
|
; KNL-NEXT: andq $-32, %rsp
|
|
; KNL-NEXT: subq $128, %rsp
|
|
; KNL-NEXT: Ltmp3:
|
|
; KNL-NEXT: .cfi_offset %rbx, -56
|
|
; KNL-NEXT: Ltmp4:
|
|
; KNL-NEXT: .cfi_offset %r12, -48
|
|
; KNL-NEXT: Ltmp5:
|
|
; KNL-NEXT: .cfi_offset %r13, -40
|
|
; KNL-NEXT: Ltmp6:
|
|
; KNL-NEXT: .cfi_offset %r14, -32
|
|
; KNL-NEXT: Ltmp7:
|
|
; KNL-NEXT: .cfi_offset %r15, -24
|
|
; KNL-NEXT: movq %rdi, %rax
|
|
; KNL-NEXT: shrq $32, %rax
|
|
; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
|
|
; KNL-NEXT: movl $271, %eax ## imm = 0x10F
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: movl %edi, %ecx
|
|
; KNL-NEXT: andl $1, %ecx
|
|
; KNL-NEXT: vmovd %ecx, %xmm0
|
|
; KNL-NEXT: movl $257, %ecx ## imm = 0x101
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $258, %ecx ## imm = 0x102
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $259, %ecx ## imm = 0x103
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $260, %ecx ## imm = 0x104
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $261, %ecx ## imm = 0x105
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $262, %ecx ## imm = 0x106
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $263, %ecx ## imm = 0x107
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $264, %ecx ## imm = 0x108
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $265, %ecx ## imm = 0x109
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $266, %ecx ## imm = 0x10A
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $267, %ecx ## imm = 0x10B
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $268, %ecx ## imm = 0x10C
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $269, %ecx ## imm = 0x10D
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $270, %ecx ## imm = 0x10E
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
|
|
; KNL-NEXT: movl $1, %eax
|
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
|
|
; KNL-NEXT: movq %r15, %rdx
|
|
; KNL-NEXT: shrq $17, %rdx
|
|
; KNL-NEXT: andb $1, %dl
|
|
; KNL-NEXT: je LBB22_2
|
|
; KNL-NEXT: ## BB#1:
|
|
; KNL-NEXT: movb $-1, %dl
|
|
; KNL-NEXT: LBB22_2:
|
|
; KNL-NEXT: movq %r15, %r11
|
|
; KNL-NEXT: shrq $16, %r11
|
|
; KNL-NEXT: andb $1, %r11b
|
|
; KNL-NEXT: je LBB22_4
|
|
; KNL-NEXT: ## BB#3:
|
|
; KNL-NEXT: movb $-1, %r11b
|
|
; KNL-NEXT: LBB22_4:
|
|
; KNL-NEXT: movq %r15, %r10
|
|
; KNL-NEXT: shrq $18, %r10
|
|
; KNL-NEXT: andb $1, %r10b
|
|
; KNL-NEXT: je LBB22_6
|
|
; KNL-NEXT: ## BB#5:
|
|
; KNL-NEXT: movb $-1, %r10b
|
|
; KNL-NEXT: LBB22_6:
|
|
; KNL-NEXT: movq %r15, %r9
|
|
; KNL-NEXT: shrq $19, %r9
|
|
; KNL-NEXT: andb $1, %r9b
|
|
; KNL-NEXT: je LBB22_8
|
|
; KNL-NEXT: ## BB#7:
|
|
; KNL-NEXT: movb $-1, %r9b
|
|
; KNL-NEXT: LBB22_8:
|
|
; KNL-NEXT: movq %r15, %rbx
|
|
; KNL-NEXT: shrq $20, %rbx
|
|
; KNL-NEXT: andb $1, %bl
|
|
; KNL-NEXT: je LBB22_10
|
|
; KNL-NEXT: ## BB#9:
|
|
; KNL-NEXT: movb $-1, %bl
|
|
; KNL-NEXT: LBB22_10:
|
|
; KNL-NEXT: movq %r15, %r12
|
|
; KNL-NEXT: shrq $21, %r12
|
|
; KNL-NEXT: andb $1, %r12b
|
|
; KNL-NEXT: je LBB22_12
|
|
; KNL-NEXT: ## BB#11:
|
|
; KNL-NEXT: movb $-1, %r12b
|
|
; KNL-NEXT: LBB22_12:
|
|
; KNL-NEXT: movq %r15, %r14
|
|
; KNL-NEXT: shrq $22, %r14
|
|
; KNL-NEXT: andb $1, %r14b
|
|
; KNL-NEXT: je LBB22_14
|
|
; KNL-NEXT: ## BB#13:
|
|
; KNL-NEXT: movb $-1, %r14b
|
|
; KNL-NEXT: LBB22_14:
|
|
; KNL-NEXT: movq %r15, %r8
|
|
; KNL-NEXT: shrq $23, %r8
|
|
; KNL-NEXT: andb $1, %r8b
|
|
; KNL-NEXT: je LBB22_16
|
|
; KNL-NEXT: ## BB#15:
|
|
; KNL-NEXT: movb $-1, %r8b
|
|
; KNL-NEXT: LBB22_16:
|
|
; KNL-NEXT: movq %r15, %r13
|
|
; KNL-NEXT: shrq $24, %r13
|
|
; KNL-NEXT: andb $1, %r13b
|
|
; KNL-NEXT: je LBB22_18
|
|
; KNL-NEXT: ## BB#17:
|
|
; KNL-NEXT: movb $-1, %r13b
|
|
; KNL-NEXT: LBB22_18:
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $25, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_20
|
|
; KNL-NEXT: ## BB#19:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_20:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $26, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_22
|
|
; KNL-NEXT: ## BB#21:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_22:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movl $272, %esi ## imm = 0x110
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $27, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_24
|
|
; KNL-NEXT: ## BB#23:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_24:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movl $273, %eax ## imm = 0x111
|
|
; KNL-NEXT: bextrl %esi, %edi, %esi
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shrq $28, %rcx
|
|
; KNL-NEXT: andb $1, %cl
|
|
; KNL-NEXT: je LBB22_26
|
|
; KNL-NEXT: ## BB#25:
|
|
; KNL-NEXT: movb $-1, %cl
|
|
; KNL-NEXT: LBB22_26:
|
|
; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vmovd %esi, %xmm2
|
|
; KNL-NEXT: movl $274, %esi ## imm = 0x112
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shrq $29, %rcx
|
|
; KNL-NEXT: andb $1, %cl
|
|
; KNL-NEXT: je LBB22_28
|
|
; KNL-NEXT: ## BB#27:
|
|
; KNL-NEXT: movb $-1, %cl
|
|
; KNL-NEXT: LBB22_28:
|
|
; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %esi, %edi, %eax
|
|
; KNL-NEXT: movzbl %r11b, %esi
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shrq $30, %rcx
|
|
; KNL-NEXT: andb $1, %cl
|
|
; KNL-NEXT: je LBB22_30
|
|
; KNL-NEXT: ## BB#29:
|
|
; KNL-NEXT: movb $-1, %cl
|
|
; KNL-NEXT: LBB22_30:
|
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: movl $275, %eax ## imm = 0x113
|
|
; KNL-NEXT: bextrl %eax, %edi, %r11d
|
|
; KNL-NEXT: movzbl %dl, %edx
|
|
; KNL-NEXT: vmovd %esi, %xmm3
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $31, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_32
|
|
; KNL-NEXT: ## BB#31:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_32:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
|
|
; KNL-NEXT: movl $276, %eax ## imm = 0x114
|
|
; KNL-NEXT: bextrl %eax, %edi, %esi
|
|
; KNL-NEXT: movl $277, %r11d ## imm = 0x115
|
|
; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r10b, %r10d
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_34
|
|
; KNL-NEXT: ## BB#33:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_34:
|
|
; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %r11d, %edi, %edx
|
|
; KNL-NEXT: movl $278, %r11d ## imm = 0x116
|
|
; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r9b, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shlq $63, %rcx
|
|
; KNL-NEXT: sarq $63, %rcx
|
|
; KNL-NEXT: vmovd %ecx, %xmm4
|
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $2, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_36
|
|
; KNL-NEXT: ## BB#35:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_36:
|
|
; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %r11d, %edi, %edx
|
|
; KNL-NEXT: movl $279, %r9d ## imm = 0x117
|
|
; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %bl, %ebx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $3, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_38
|
|
; KNL-NEXT: ## BB#37:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_38:
|
|
; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %r9d, %edi, %edx
|
|
; KNL-NEXT: movl $280, %esi ## imm = 0x118
|
|
; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r12b, %ebx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $4, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_40
|
|
; KNL-NEXT: ## BB#39:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_40:
|
|
; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %esi, %edi, %ecx
|
|
; KNL-NEXT: movl $281, %edx ## imm = 0x119
|
|
; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r14b, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $5, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_42
|
|
; KNL-NEXT: ## BB#41:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_42:
|
|
; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %ecx
|
|
; KNL-NEXT: movl $282, %edx ## imm = 0x11A
|
|
; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r8b, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %bl
|
|
; KNL-NEXT: shrb $6, %bl
|
|
; KNL-NEXT: andb $1, %bl
|
|
; KNL-NEXT: je LBB22_44
|
|
; KNL-NEXT: ## BB#43:
|
|
; KNL-NEXT: movb $-1, %bl
|
|
; KNL-NEXT: LBB22_44:
|
|
; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %eax
|
|
; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
|
|
; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r13b, %esi
|
|
; KNL-NEXT: movzbl %bl, %edx
|
|
; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %bl
|
|
; KNL-NEXT: shrb $7, %bl
|
|
; KNL-NEXT: je LBB22_46
|
|
; KNL-NEXT: ## BB#45:
|
|
; KNL-NEXT: movb $-1, %bl
|
|
; KNL-NEXT: LBB22_46:
|
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: movl $284, %edx ## imm = 0x11C
|
|
; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %al, %esi
|
|
; KNL-NEXT: movzbl %bl, %eax
|
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $8, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_48
|
|
; KNL-NEXT: ## BB#47:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_48:
|
|
; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %ecx
|
|
; KNL-NEXT: movl $285, %edx ## imm = 0x11D
|
|
; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %sil, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $9, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_50
|
|
; KNL-NEXT: ## BB#49:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_50:
|
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %ecx
|
|
; KNL-NEXT: movl $286, %edx ## imm = 0x11E
|
|
; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %sil, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $10, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_52
|
|
; KNL-NEXT: ## BB#51:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_52:
|
|
; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %edx
|
|
; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $11, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_54
|
|
; KNL-NEXT: ## BB#53:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_54:
|
|
; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: shrl $31, %edi
|
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $12, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_56
|
|
; KNL-NEXT: ## BB#55:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_56:
|
|
; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
|
|
; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $13, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_58
|
|
; KNL-NEXT: ## BB#57:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_58:
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $14, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB22_60
|
|
; KNL-NEXT: ## BB#59:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB22_60:
|
|
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
|
; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
|
|
; KNL-NEXT: shrq $15, %r15
|
|
; KNL-NEXT: andb $1, %r15b
|
|
; KNL-NEXT: je LBB22_62
|
|
; KNL-NEXT: ## BB#61:
|
|
; KNL-NEXT: movb $-1, %r15b
|
|
; KNL-NEXT: LBB22_62:
|
|
; KNL-NEXT: movzbl %r15b, %eax
|
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
|
; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
|
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
|
|
; KNL-NEXT: leaq -40(%rbp), %rsp
|
|
; KNL-NEXT: popq %rbx
|
|
; KNL-NEXT: popq %r12
|
|
; KNL-NEXT: popq %r13
|
|
; KNL-NEXT: popq %r14
|
|
; KNL-NEXT: popq %r15
|
|
; KNL-NEXT: popq %rbp
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test16:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: kmovq %rdi, %k0
|
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
|
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
|
; SKX-NEXT: kshiftlq $5, %k1, %k1
|
|
; SKX-NEXT: korq %k1, %k0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
%a = bitcast i64 %x to <64 x i1>
|
|
%b = insertelement <64 x i1>%a, i1 true, i32 5
|
|
%c = sext <64 x i1>%b to <64 x i8>
|
|
ret <64 x i8>%c
|
|
}
|
|
|
|
define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
|
|
; KNL-LABEL: test17:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: pushq %rbp
|
|
; KNL-NEXT: Ltmp8:
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
; KNL-NEXT: Ltmp9:
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
; KNL-NEXT: Ltmp10:
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
; KNL-NEXT: pushq %r15
|
|
; KNL-NEXT: pushq %r14
|
|
; KNL-NEXT: pushq %r13
|
|
; KNL-NEXT: pushq %r12
|
|
; KNL-NEXT: pushq %rbx
|
|
; KNL-NEXT: andq $-32, %rsp
|
|
; KNL-NEXT: subq $128, %rsp
|
|
; KNL-NEXT: Ltmp11:
|
|
; KNL-NEXT: .cfi_offset %rbx, -56
|
|
; KNL-NEXT: Ltmp12:
|
|
; KNL-NEXT: .cfi_offset %r12, -48
|
|
; KNL-NEXT: Ltmp13:
|
|
; KNL-NEXT: .cfi_offset %r13, -40
|
|
; KNL-NEXT: Ltmp14:
|
|
; KNL-NEXT: .cfi_offset %r14, -32
|
|
; KNL-NEXT: Ltmp15:
|
|
; KNL-NEXT: .cfi_offset %r15, -24
|
|
; KNL-NEXT: movq %rdi, %rax
|
|
; KNL-NEXT: shrq $32, %rax
|
|
; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
|
|
; KNL-NEXT: movl %edi, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: vmovd %eax, %xmm0
|
|
; KNL-NEXT: movl $257, %eax ## imm = 0x101
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $258, %eax ## imm = 0x102
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $259, %eax ## imm = 0x103
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $260, %eax ## imm = 0x104
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $261, %eax ## imm = 0x105
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $262, %eax ## imm = 0x106
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $263, %eax ## imm = 0x107
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $264, %eax ## imm = 0x108
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $265, %eax ## imm = 0x109
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $266, %eax ## imm = 0x10A
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $267, %eax ## imm = 0x10B
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $268, %eax ## imm = 0x10C
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $269, %eax ## imm = 0x10D
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $270, %eax ## imm = 0x10E
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
; KNL-NEXT: movl $271, %eax ## imm = 0x10F
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
|
|
; KNL-NEXT: cmpl %edx, %esi
|
|
; KNL-NEXT: setg %al
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
|
|
; KNL-NEXT: movq %r15, %rdx
|
|
; KNL-NEXT: shrq $17, %rdx
|
|
; KNL-NEXT: andb $1, %dl
|
|
; KNL-NEXT: je LBB23_2
|
|
; KNL-NEXT: ## BB#1:
|
|
; KNL-NEXT: movb $-1, %dl
|
|
; KNL-NEXT: LBB23_2:
|
|
; KNL-NEXT: movq %r15, %r11
|
|
; KNL-NEXT: shrq $16, %r11
|
|
; KNL-NEXT: andb $1, %r11b
|
|
; KNL-NEXT: je LBB23_4
|
|
; KNL-NEXT: ## BB#3:
|
|
; KNL-NEXT: movb $-1, %r11b
|
|
; KNL-NEXT: LBB23_4:
|
|
; KNL-NEXT: movq %r15, %r10
|
|
; KNL-NEXT: shrq $18, %r10
|
|
; KNL-NEXT: andb $1, %r10b
|
|
; KNL-NEXT: je LBB23_6
|
|
; KNL-NEXT: ## BB#5:
|
|
; KNL-NEXT: movb $-1, %r10b
|
|
; KNL-NEXT: LBB23_6:
|
|
; KNL-NEXT: movq %r15, %r9
|
|
; KNL-NEXT: shrq $19, %r9
|
|
; KNL-NEXT: andb $1, %r9b
|
|
; KNL-NEXT: je LBB23_8
|
|
; KNL-NEXT: ## BB#7:
|
|
; KNL-NEXT: movb $-1, %r9b
|
|
; KNL-NEXT: LBB23_8:
|
|
; KNL-NEXT: movq %r15, %rbx
|
|
; KNL-NEXT: shrq $20, %rbx
|
|
; KNL-NEXT: andb $1, %bl
|
|
; KNL-NEXT: je LBB23_10
|
|
; KNL-NEXT: ## BB#9:
|
|
; KNL-NEXT: movb $-1, %bl
|
|
; KNL-NEXT: LBB23_10:
|
|
; KNL-NEXT: movq %r15, %r12
|
|
; KNL-NEXT: shrq $21, %r12
|
|
; KNL-NEXT: andb $1, %r12b
|
|
; KNL-NEXT: je LBB23_12
|
|
; KNL-NEXT: ## BB#11:
|
|
; KNL-NEXT: movb $-1, %r12b
|
|
; KNL-NEXT: LBB23_12:
|
|
; KNL-NEXT: movq %r15, %r14
|
|
; KNL-NEXT: shrq $22, %r14
|
|
; KNL-NEXT: andb $1, %r14b
|
|
; KNL-NEXT: je LBB23_14
|
|
; KNL-NEXT: ## BB#13:
|
|
; KNL-NEXT: movb $-1, %r14b
|
|
; KNL-NEXT: LBB23_14:
|
|
; KNL-NEXT: movq %r15, %r8
|
|
; KNL-NEXT: shrq $23, %r8
|
|
; KNL-NEXT: andb $1, %r8b
|
|
; KNL-NEXT: je LBB23_16
|
|
; KNL-NEXT: ## BB#15:
|
|
; KNL-NEXT: movb $-1, %r8b
|
|
; KNL-NEXT: LBB23_16:
|
|
; KNL-NEXT: movq %r15, %r13
|
|
; KNL-NEXT: shrq $24, %r13
|
|
; KNL-NEXT: andb $1, %r13b
|
|
; KNL-NEXT: je LBB23_18
|
|
; KNL-NEXT: ## BB#17:
|
|
; KNL-NEXT: movb $-1, %r13b
|
|
; KNL-NEXT: LBB23_18:
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $25, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_20
|
|
; KNL-NEXT: ## BB#19:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_20:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $26, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_22
|
|
; KNL-NEXT: ## BB#21:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_22:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movl $272, %esi ## imm = 0x110
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $27, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_24
|
|
; KNL-NEXT: ## BB#23:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_24:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movl $273, %eax ## imm = 0x111
|
|
; KNL-NEXT: bextrl %esi, %edi, %esi
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shrq $28, %rcx
|
|
; KNL-NEXT: andb $1, %cl
|
|
; KNL-NEXT: je LBB23_26
|
|
; KNL-NEXT: ## BB#25:
|
|
; KNL-NEXT: movb $-1, %cl
|
|
; KNL-NEXT: LBB23_26:
|
|
; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: bextrl %eax, %edi, %eax
|
|
; KNL-NEXT: vmovd %esi, %xmm2
|
|
; KNL-NEXT: movl $274, %esi ## imm = 0x112
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shrq $29, %rcx
|
|
; KNL-NEXT: andb $1, %cl
|
|
; KNL-NEXT: je LBB23_28
|
|
; KNL-NEXT: ## BB#27:
|
|
; KNL-NEXT: movb $-1, %cl
|
|
; KNL-NEXT: LBB23_28:
|
|
; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %esi, %edi, %eax
|
|
; KNL-NEXT: movzbl %r11b, %esi
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shrq $30, %rcx
|
|
; KNL-NEXT: andb $1, %cl
|
|
; KNL-NEXT: je LBB23_30
|
|
; KNL-NEXT: ## BB#29:
|
|
; KNL-NEXT: movb $-1, %cl
|
|
; KNL-NEXT: LBB23_30:
|
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: movl $275, %eax ## imm = 0x113
|
|
; KNL-NEXT: bextrl %eax, %edi, %r11d
|
|
; KNL-NEXT: movzbl %dl, %edx
|
|
; KNL-NEXT: vmovd %esi, %xmm3
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $31, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_32
|
|
; KNL-NEXT: ## BB#31:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_32:
|
|
; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
|
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
|
|
; KNL-NEXT: movl $276, %eax ## imm = 0x114
|
|
; KNL-NEXT: bextrl %eax, %edi, %esi
|
|
; KNL-NEXT: movl $277, %r11d ## imm = 0x115
|
|
; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r10b, %r10d
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_34
|
|
; KNL-NEXT: ## BB#33:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_34:
|
|
; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %r11d, %edi, %edx
|
|
; KNL-NEXT: movl $278, %r11d ## imm = 0x116
|
|
; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r9b, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: movq %r15, %rcx
|
|
; KNL-NEXT: shlq $63, %rcx
|
|
; KNL-NEXT: sarq $63, %rcx
|
|
; KNL-NEXT: vmovd %ecx, %xmm4
|
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $2, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_36
|
|
; KNL-NEXT: ## BB#35:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_36:
|
|
; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %r11d, %edi, %edx
|
|
; KNL-NEXT: movl $279, %r9d ## imm = 0x117
|
|
; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %bl, %ebx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $3, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_38
|
|
; KNL-NEXT: ## BB#37:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_38:
|
|
; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %r9d, %edi, %edx
|
|
; KNL-NEXT: movl $280, %esi ## imm = 0x118
|
|
; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r12b, %ebx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $4, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_40
|
|
; KNL-NEXT: ## BB#39:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_40:
|
|
; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %esi, %edi, %ecx
|
|
; KNL-NEXT: movl $281, %edx ## imm = 0x119
|
|
; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r14b, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %al
|
|
; KNL-NEXT: shrb $5, %al
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_42
|
|
; KNL-NEXT: ## BB#41:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_42:
|
|
; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %ecx
|
|
; KNL-NEXT: movl $282, %edx ## imm = 0x11A
|
|
; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r8b, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %bl
|
|
; KNL-NEXT: shrb $6, %bl
|
|
; KNL-NEXT: andb $1, %bl
|
|
; KNL-NEXT: je LBB23_44
|
|
; KNL-NEXT: ## BB#43:
|
|
; KNL-NEXT: movb $-1, %bl
|
|
; KNL-NEXT: LBB23_44:
|
|
; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %eax
|
|
; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
|
|
; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movzbl %r13b, %esi
|
|
; KNL-NEXT: movzbl %bl, %edx
|
|
; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
|
|
; KNL-NEXT: movb %r15b, %bl
|
|
; KNL-NEXT: shrb $7, %bl
|
|
; KNL-NEXT: je LBB23_46
|
|
; KNL-NEXT: ## BB#45:
|
|
; KNL-NEXT: movb $-1, %bl
|
|
; KNL-NEXT: LBB23_46:
|
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %ecx, %edi, %ecx
|
|
; KNL-NEXT: movl $284, %edx ## imm = 0x11C
|
|
; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %al, %esi
|
|
; KNL-NEXT: movzbl %bl, %eax
|
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $8, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_48
|
|
; KNL-NEXT: ## BB#47:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_48:
|
|
; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %ecx
|
|
; KNL-NEXT: movl $285, %edx ## imm = 0x11D
|
|
; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %sil, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $9, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_50
|
|
; KNL-NEXT: ## BB#49:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_50:
|
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %ecx
|
|
; KNL-NEXT: movl $286, %edx ## imm = 0x11E
|
|
; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %sil, %esi
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $10, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_52
|
|
; KNL-NEXT: ## BB#51:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_52:
|
|
; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
|
|
; KNL-NEXT: bextrl %edx, %edi, %edx
|
|
; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $11, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_54
|
|
; KNL-NEXT: ## BB#53:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_54:
|
|
; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
|
|
; KNL-NEXT: shrl $31, %edi
|
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $12, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_56
|
|
; KNL-NEXT: ## BB#55:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_56:
|
|
; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
|
|
; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $13, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_58
|
|
; KNL-NEXT: ## BB#57:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_58:
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
|
|
; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
|
|
; KNL-NEXT: movzbl %cl, %ecx
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
|
|
; KNL-NEXT: movq %r15, %rax
|
|
; KNL-NEXT: shrq $14, %rax
|
|
; KNL-NEXT: andb $1, %al
|
|
; KNL-NEXT: je LBB23_60
|
|
; KNL-NEXT: ## BB#59:
|
|
; KNL-NEXT: movb $-1, %al
|
|
; KNL-NEXT: LBB23_60:
|
|
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
|
; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
|
|
; KNL-NEXT: movzbl %al, %eax
|
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
|
|
; KNL-NEXT: shrq $15, %r15
|
|
; KNL-NEXT: andb $1, %r15b
|
|
; KNL-NEXT: je LBB23_62
|
|
; KNL-NEXT: ## BB#61:
|
|
; KNL-NEXT: movb $-1, %r15b
|
|
; KNL-NEXT: LBB23_62:
|
|
; KNL-NEXT: movzbl %r15b, %eax
|
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
|
; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
|
; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
|
|
; KNL-NEXT: leaq -40(%rbp), %rsp
|
|
; KNL-NEXT: popq %rbx
|
|
; KNL-NEXT: popq %r12
|
|
; KNL-NEXT: popq %r13
|
|
; KNL-NEXT: popq %r14
|
|
; KNL-NEXT: popq %r15
|
|
; KNL-NEXT: popq %rbp
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test17:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: kmovq %rdi, %k0
|
|
; SKX-NEXT: cmpl %edx, %esi
|
|
; SKX-NEXT: setg %al
|
|
; SKX-NEXT: andl $1, %eax
|
|
; SKX-NEXT: kmovw %eax, %k1
|
|
; SKX-NEXT: kshiftlq $5, %k1, %k1
|
|
; SKX-NEXT: korq %k1, %k0, %k0
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
|
; SKX-NEXT: retq
|
|
%a = bitcast i64 %x to <64 x i1>
|
|
%b = icmp sgt i32 %y, %z
|
|
%c = insertelement <64 x i1>%a, i1 %b, i32 5
|
|
%d = sext <64 x i1>%c to <64 x i8>
|
|
ret <64 x i8>%d
|
|
}
|
|
|
|
define <8 x i1> @test18(i8 %a, i16 %y) {
|
|
; KNL-LABEL: test18:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: kmovw %edi, %k0
|
|
; KNL-NEXT: kmovw %esi, %k1
|
|
; KNL-NEXT: kshiftlw $7, %k1, %k2
|
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
|
; KNL-NEXT: kshiftlw $6, %k1, %k1
|
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
; KNL-NEXT: kshiftlw $6, %k1, %k1
|
|
; KNL-NEXT: korw %k1, %k0, %k0
|
|
; KNL-NEXT: kshiftlw $7, %k2, %k1
|
|
; KNL-NEXT: korw %k1, %k0, %k1
|
|
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpmovqw %zmm0, %xmm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test18:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: kmovb %edi, %k0
|
|
; SKX-NEXT: kmovw %esi, %k1
|
|
; SKX-NEXT: kshiftlw $6, %k1, %k2
|
|
; SKX-NEXT: kshiftrw $15, %k2, %k2
|
|
; SKX-NEXT: kshiftlw $7, %k1, %k1
|
|
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
|
; SKX-NEXT: kshiftlb $7, %k1, %k1
|
|
; SKX-NEXT: kshiftlb $6, %k2, %k2
|
|
; SKX-NEXT: korb %k2, %k0, %k0
|
|
; SKX-NEXT: korb %k1, %k0, %k0
|
|
; SKX-NEXT: vpmovm2w %k0, %xmm0
|
|
; SKX-NEXT: retq
|
|
%b = bitcast i8 %a to <8 x i1>
|
|
%b1 = bitcast i16 %y to <16 x i1>
|
|
%el1 = extractelement <16 x i1>%b1, i32 8
|
|
%el2 = extractelement <16 x i1>%b1, i32 9
|
|
%c = insertelement <8 x i1>%b, i1 %el1, i32 7
|
|
%d = insertelement <8 x i1>%c, i1 %el2, i32 6
|
|
ret <8 x i1>%d
|
|
}
|
|
define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
|
|
; KNL-LABEL: test21:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
|
; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
|
|
; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
|
|
; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
|
|
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
|
|
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
|
|
; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
|
|
; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
|
|
; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test21:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
|
|
; SKX-NEXT: vpmovb2m %ymm1, %k1
|
|
; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
|
|
; SKX-NEXT: retq
|
|
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %ret
|
|
}
|
|
|
|
define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
|
|
; KNL-LABEL: test22:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: vpextrd $3, %xmm0, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vpextrd $2, %xmm0, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vpextrd $1, %xmm0, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vmovd %xmm0, %eax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test22:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
store <4 x i1> %a, <4 x i1>* %addr
|
|
ret void
|
|
}
|
|
|
|
define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
|
|
; KNL-LABEL: test23:
|
|
; KNL: ## BB#0:
|
|
; KNL-NEXT: vpextrq $1, %xmm0, %rax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: vmovq %xmm0, %rax
|
|
; KNL-NEXT: andl $1, %eax
|
|
; KNL-NEXT: movb %al, (%rdi)
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: test23:
|
|
; SKX: ## BB#0:
|
|
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
|
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
|
; SKX-NEXT: kmovb %k0, (%rdi)
|
|
; SKX-NEXT: retq
|
|
store <2 x i1> %a, <2 x i1>* %addr
|
|
ret void
|
|
}
|