2016-07-09 08:19:07 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2017-09-27 22:44:15 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=KNL %s
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_ONLY %s
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+avx512vbmi | FileCheck --check-prefix=CHECK --check-prefix=SKX --check-prefix=SKX_VBMI %s
|
2013-07-31 19:35:14 +08:00
|
|
|
|
|
|
|
define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm2
|
|
|
|
; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%rrr = load float, float* %br
|
2013-07-31 19:35:14 +08:00
|
|
|
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
|
|
|
|
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
|
|
|
|
ret <16 x float> %rrr3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-LABEL: test2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm2
|
|
|
|
; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%rrr = load double, double* %br
|
2013-07-31 19:35:14 +08:00
|
|
|
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
|
|
|
|
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
|
|
|
|
ret <8 x double> %rrr3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test3(<16 x float> %x) nounwind {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test3:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-30 15:26:12 +08:00
|
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-07-31 19:35:14 +08:00
|
|
|
%eee = extractelement <16 x float> %x, i32 4
|
|
|
|
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1
|
|
|
|
ret <16 x float> %rrr2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @test4(<8 x i64> %x) nounwind {
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-LABEL: test4:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vmovq %xmm1, %rax
|
|
|
|
; CHECK-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2013-07-31 19:35:14 +08:00
|
|
|
%eee = extractelement <8 x i64> %x, i32 4
|
|
|
|
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
|
|
|
|
ret <8 x i64> %rrr2
|
2013-08-04 18:46:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test5(<4 x float> %x) nounwind {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test5:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vextractps $3, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-04 18:46:07 +08:00
|
|
|
%ef = extractelement <4 x float> %x, i32 3
|
|
|
|
%ei = bitcast float %ef to i32
|
|
|
|
ret i32 %ei
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test6(<4 x float> %x, float* %out) nounwind {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test6:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vextractps $3, %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: retq
|
2013-08-04 18:46:07 +08:00
|
|
|
%ef = extractelement <4 x float> %x, i32 3
|
|
|
|
store float %ef, float* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2013-09-12 16:55:00 +08:00
|
|
|
define float @test7(<16 x float> %x, i32 %ind) nounwind {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: test7:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $15, %edi
|
|
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2013-09-12 16:55:00 +08:00
|
|
|
%e = extractelement <16 x float> %x, i32 %ind
|
|
|
|
ret float %e
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @test8(<8 x double> %x, i32 %ind) nounwind {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: test8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2013-09-12 16:55:00 +08:00
|
|
|
%e = extractelement <8 x double> %x, i32 %ind
|
|
|
|
ret double %e
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @test9(<8 x float> %x, i32 %ind) nounwind {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: test9:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2013-09-12 16:55:00 +08:00
|
|
|
%e = extractelement <8 x float> %x, i32 %ind
|
|
|
|
ret float %e
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: test10:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $15, %edi
|
|
|
|
; CHECK-NEXT: movl (%rsp,%rdi,4), %eax
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2013-09-12 16:55:00 +08:00
|
|
|
%e = extractelement <16 x i32> %x, i32 %ind
|
|
|
|
ret i32 %e
|
|
|
|
}
|
|
|
|
|
2013-11-14 19:29:27 +08:00
|
|
|
define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-LABEL: test11:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
2017-12-15 02:35:25 +08:00
|
|
|
; KNL-NEXT: kshiftrw $4, %k0, %k0
|
2016-09-04 22:03:52 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: testb $1, %al
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: je LBB10_2
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL-NEXT: ## %bb.1: ## %A
|
2016-07-22 13:00:52 +08:00
|
|
|
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
; KNL-NEXT: LBB10_2: ## %B
|
|
|
|
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test11:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
2017-12-15 02:35:25 +08:00
|
|
|
; SKX-NEXT: kshiftrw $4, %k0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: testb $1, %al
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: je LBB10_2
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX-NEXT: ## %bb.1: ## %A
|
2016-07-22 13:00:52 +08:00
|
|
|
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
; SKX-NEXT: LBB10_2: ## %B
|
|
|
|
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
|
|
|
; SKX-NEXT: retq
|
2013-11-14 19:29:27 +08:00
|
|
|
%cmp_res = icmp ult <16 x i32> %a, %b
|
|
|
|
%ia = extractelement <16 x i1> %cmp_res, i32 4
|
|
|
|
br i1 %ia, label %A, label %B
|
|
|
|
A:
|
|
|
|
ret <16 x i32>%b
|
|
|
|
B:
|
|
|
|
%c = add <16 x i32>%b, %a
|
2013-12-16 21:52:35 +08:00
|
|
|
ret <16 x i32>%c
|
2013-11-14 19:29:27 +08:00
|
|
|
}
|
2014-02-10 15:02:39 +08:00
|
|
|
|
|
|
|
define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-LABEL: test12:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0
|
2016-09-04 22:03:52 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: testb $1, %al
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: cmoveq %rsi, %rdi
|
|
|
|
; KNL-NEXT: movq %rdi, %rax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test12:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: testb $1, %al
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: cmoveq %rsi, %rdi
|
|
|
|
; SKX-NEXT: movq %rdi, %rax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: retq
|
2014-02-10 15:02:39 +08:00
|
|
|
%cmpvector_func.i = icmp slt <16 x i64> %a, %b
|
|
|
|
%extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
|
|
|
|
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
|
|
|
|
ret i64 %res
|
|
|
|
}
|
|
|
|
|
2014-02-16 19:34:23 +08:00
|
|
|
define i16 @test13(i32 %a, i32 %b) {
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-LABEL: test13:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: cmpl %esi, %edi
|
2017-03-13 02:28:48 +08:00
|
|
|
; KNL-NEXT: setb %al
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: movw $-4, %cx
|
|
|
|
; KNL-NEXT: kmovw %ecx, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
|
|
|
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: andl $1, %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: kmovw %eax, %k1
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: korw %k1, %k0, %k0
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %ax killed %ax killed %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test13:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: cmpl %esi, %edi
|
2017-03-13 02:28:48 +08:00
|
|
|
; SKX-NEXT: setb %al
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: movw $-4, %cx
|
|
|
|
; SKX-NEXT: kmovd %ecx, %k0
|
|
|
|
; SKX-NEXT: kshiftrw $1, %k0, %k0
|
|
|
|
; SKX-NEXT: kshiftlw $1, %k0, %k0
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: kmovw %eax, %k1
|
|
|
|
; SKX-NEXT: korw %k1, %k0, %k0
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %ax killed %ax killed %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: retq
|
2014-02-16 19:34:23 +08:00
|
|
|
%cmp_res = icmp ult i32 %a, %b
|
|
|
|
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
|
|
|
|
%res = bitcast <16 x i1> %maskv to i16
|
|
|
|
ret i16 %res
|
|
|
|
}
|
|
|
|
|
2014-03-02 17:19:44 +08:00
|
|
|
define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-LABEL: test14:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
2017-12-15 02:35:25 +08:00
|
|
|
; KNL-NEXT: kshiftrw $4, %k0, %k0
|
2016-09-04 22:03:52 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: testb $1, %al
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: cmoveq %rsi, %rdi
|
|
|
|
; KNL-NEXT: movq %rdi, %rax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test14:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
2017-12-15 02:35:25 +08:00
|
|
|
; SKX-NEXT: kshiftrb $4, %k0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: testb $1, %al
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: cmoveq %rsi, %rdi
|
|
|
|
; SKX-NEXT: movq %rdi, %rax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: retq
|
2014-03-02 17:19:44 +08:00
|
|
|
%cmpvector_func.i = icmp slt <8 x i64> %a, %b
|
|
|
|
%extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
|
|
|
|
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
|
|
|
|
ret i64 %res
|
|
|
|
}
|
2014-04-09 20:37:50 +08:00
|
|
|
|
|
|
|
define i16 @test15(i1 *%addr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test15:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: movb (%rdi), %al
|
|
|
|
; CHECK-NEXT: xorl %ecx, %ecx
|
|
|
|
; CHECK-NEXT: testb %al, %al
|
|
|
|
; CHECK-NEXT: movw $-1, %ax
|
|
|
|
; CHECK-NEXT: cmovew %cx, %ax
|
|
|
|
; CHECK-NEXT: retq
|
2015-05-20 22:32:03 +08:00
|
|
|
%x = load i1 , i1 * %addr, align 1
|
2014-04-09 20:37:50 +08:00
|
|
|
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
|
|
|
|
%x2 = bitcast <16 x i1>%x1 to i16
|
|
|
|
ret i16 %x2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @test16(i1 *%addr, i16 %a) {
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-LABEL: test16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: movb (%rdi), %al
|
2017-12-08 08:16:09 +08:00
|
|
|
; KNL-NEXT: kmovw %esi, %k0
|
|
|
|
; KNL-NEXT: kmovw %eax, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $10, %k0, %k2
|
|
|
|
; KNL-NEXT: kxorw %k1, %k2, %k1
|
|
|
|
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $5, %k1, %k1
|
|
|
|
; KNL-NEXT: kxorw %k0, %k1, %k0
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %ax killed %ax killed %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-10-22 19:43:08 +08:00
|
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
|
|
; SKX-NEXT: kmovd %esi, %k1
|
2017-12-08 08:16:09 +08:00
|
|
|
; SKX-NEXT: kshiftrw $10, %k1, %k2
|
|
|
|
; SKX-NEXT: kxorw %k0, %k2, %k0
|
|
|
|
; SKX-NEXT: kshiftlw $15, %k0, %k0
|
|
|
|
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
|
|
|
; SKX-NEXT: kxorw %k1, %k0, %k0
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %ax killed %ax killed %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%x = load i1 , i1 * %addr, align 128
|
2014-04-09 20:37:50 +08:00
|
|
|
%a1 = bitcast i16 %a to <16 x i1>
|
|
|
|
%x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
|
|
|
|
%x2 = bitcast <16 x i1>%x1 to i16
|
|
|
|
ret i16 %x2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @test17(i1 *%addr, i8 %a) {
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-LABEL: test17:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: movb (%rdi), %al
|
2017-12-08 08:16:09 +08:00
|
|
|
; KNL-NEXT: kmovw %esi, %k0
|
|
|
|
; KNL-NEXT: kmovw %eax, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $4, %k0, %k2
|
|
|
|
; KNL-NEXT: kxorw %k1, %k2, %k1
|
|
|
|
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $11, %k1, %k1
|
|
|
|
; KNL-NEXT: kxorw %k0, %k1, %k0
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %al killed %al killed %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test17:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-10-22 19:43:08 +08:00
|
|
|
; SKX-NEXT: kmovb (%rdi), %k0
|
|
|
|
; SKX-NEXT: kmovd %esi, %k1
|
2017-12-08 08:16:09 +08:00
|
|
|
; SKX-NEXT: kshiftrb $4, %k1, %k2
|
|
|
|
; SKX-NEXT: kxorb %k0, %k2, %k0
|
|
|
|
; SKX-NEXT: kshiftlb $7, %k0, %k0
|
|
|
|
; SKX-NEXT: kshiftrb $3, %k0, %k0
|
|
|
|
; SKX-NEXT: kxorb %k1, %k0, %k0
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %al killed %al killed %eax
|
2016-07-09 08:19:07 +08:00
|
|
|
; SKX-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%x = load i1 , i1 * %addr, align 128
|
2014-04-09 20:37:50 +08:00
|
|
|
%a1 = bitcast i8 %a to <8 x i1>
|
2014-10-16 07:42:11 +08:00
|
|
|
%x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
|
2014-04-09 20:37:50 +08:00
|
|
|
%x2 = bitcast <8 x i1>%x1 to i8
|
|
|
|
ret i8 %x2
|
|
|
|
}
|
|
|
|
|
2015-10-08 20:55:01 +08:00
|
|
|
define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v8i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <8 x i64> %x, i32 1
|
|
|
|
%r2 = extractelement <8 x i64> %x, i32 3
|
|
|
|
store i64 %r2, i64* %dst, align 1
|
|
|
|
ret i64 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <4 x i64> %x, i32 1
|
|
|
|
%r2 = extractelement <4 x i64> %x, i32 3
|
|
|
|
store i64 %r2, i64* %dst, align 1
|
|
|
|
ret i64 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: extract_v2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vmovq %xmm0, %rax
|
|
|
|
; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi)
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <2 x i64> %x, i32 0
|
|
|
|
%r2 = extractelement <2 x i64> %x, i32 1
|
|
|
|
store i64 %r2, i64* %dst, align 1
|
|
|
|
ret i64 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v16i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-22 04:19:48 +08:00
|
|
|
; CHECK-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <16 x i32> %x, i32 1
|
|
|
|
%r2 = extractelement <16 x i32> %x, i32 5
|
|
|
|
store i32 %r2, i32* %dst, align 1
|
|
|
|
ret i32 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-22 04:19:48 +08:00
|
|
|
; CHECK-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vextractps $1, %xmm0, (%rdi)
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <8 x i32> %x, i32 1
|
|
|
|
%r2 = extractelement <8 x i32> %x, i32 5
|
|
|
|
store i32 %r2, i32* %dst, align 1
|
|
|
|
ret i32 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: extract_v4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-22 04:19:48 +08:00
|
|
|
; CHECK-NEXT: vextractps $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextractps $3, %xmm0, (%rdi)
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <4 x i32> %x, i32 1
|
|
|
|
%r2 = extractelement <4 x i32> %x, i32 3
|
|
|
|
store i32 %r2, i32* %dst, align 1
|
|
|
|
ret i32 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v32i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi)
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <32 x i16> %x, i32 1
|
|
|
|
%r2 = extractelement <32 x i16> %x, i32 9
|
|
|
|
store i16 %r2, i16* %dst, align 1
|
|
|
|
ret i16 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi)
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <16 x i16> %x, i32 1
|
|
|
|
%r2 = extractelement <16 x i16> %x, i32 9
|
|
|
|
store i16 %r2, i16* %dst, align 1
|
|
|
|
ret i16 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: extract_v8i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vpextrw $3, %xmm0, (%rdi)
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <8 x i16> %x, i32 1
|
|
|
|
%r2 = extractelement <8 x i16> %x, i32 3
|
|
|
|
store i16 %r2, i16* %dst, align 1
|
|
|
|
ret i16 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v64i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi)
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <64 x i8> %x, i32 1
|
|
|
|
%r2 = extractelement <64 x i8> %x, i32 17
|
|
|
|
store i8 %r2, i8* %dst, align 1
|
|
|
|
ret i8 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: extract_v32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi)
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <32 x i8> %x, i32 1
|
|
|
|
%r2 = extractelement <32 x i8> %x, i32 17
|
|
|
|
store i8 %r2, i8* %dst, align 1
|
|
|
|
ret i8 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: extract_v16i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpextrb $1, %xmm0, %eax
|
|
|
|
; CHECK-NEXT: vpextrb $3, %xmm0, (%rdi)
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %al killed %al killed %eax
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%r1 = extractelement <16 x i8> %x, i32 1
|
|
|
|
%r2 = extractelement <16 x i8> %x, i32 3
|
|
|
|
store i8 %r2, i8* %dst, align 1
|
|
|
|
ret i8 %r1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-LABEL: insert_v8i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
|
2017-08-30 15:26:12 +08:00
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i64, i64* %ptr
|
|
|
|
%r1 = insertelement <8 x i64> %x, i64 %val, i32 1
|
|
|
|
%r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
|
|
|
|
ret <8 x i64> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i64, i64* %ptr
|
|
|
|
%r1 = insertelement <4 x i64> %x, i64 %val, i32 1
|
|
|
|
%r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
|
|
|
|
ret <4 x i64> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i64, i64* %ptr
|
|
|
|
%r1 = insertelement <2 x i64> %x, i64 %val, i32 1
|
2016-12-04 07:03:26 +08:00
|
|
|
%r2 = insertelement <2 x i64> %r1, i64 %y, i32 0
|
2015-10-08 20:55:01 +08:00
|
|
|
ret <2 x i64> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v16i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
|
2017-08-30 15:26:12 +08:00
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i32, i32* %ptr
|
|
|
|
%r1 = insertelement <16 x i32> %x, i32 %val, i32 1
|
|
|
|
%r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
|
|
|
|
ret <16 x i32> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i32, i32* %ptr
|
|
|
|
%r1 = insertelement <8 x i32> %x, i32 %val, i32 1
|
|
|
|
%r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
|
|
|
|
ret <8 x i32> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i32, i32* %ptr
|
|
|
|
%r1 = insertelement <4 x i32> %x, i32 %val, i32 1
|
|
|
|
%r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
|
|
|
|
ret <4 x i32> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
|
|
|
|
; KNL-LABEL: insert_v32i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2015-10-08 20:55:01 +08:00
|
|
|
; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2
|
2017-02-12 07:23:11 +08:00
|
|
|
; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
|
2015-10-08 20:55:01 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: insert_v32i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2015-10-08 20:55:01 +08:00
|
|
|
; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
|
2017-02-13 07:49:49 +08:00
|
|
|
; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
|
2017-08-30 15:26:12 +08:00
|
|
|
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
|
2017-02-13 07:49:49 +08:00
|
|
|
; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0
|
2015-10-08 20:55:01 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%val = load i16, i16* %ptr
|
|
|
|
%r1 = insertelement <32 x i16> %x, i16 %val, i32 1
|
|
|
|
%r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
|
|
|
|
ret <32 x i16> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i16, i16* %ptr
|
|
|
|
%r1 = insertelement <16 x i16> %x, i16 %val, i32 1
|
|
|
|
%r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
|
|
|
|
ret <16 x i16> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v8i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i16, i16* %ptr
|
|
|
|
%r1 = insertelement <8 x i16> %x, i16 %val, i32 1
|
|
|
|
%r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
|
|
|
|
ret <8 x i16> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
|
|
|
|
; KNL-LABEL: insert_v64i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2015-10-08 20:55:01 +08:00
|
|
|
; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2
|
|
|
|
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: insert_v64i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2015-10-08 20:55:01 +08:00
|
|
|
; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
|
2017-02-13 07:49:49 +08:00
|
|
|
; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
|
|
|
|
; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vinserti32x4 $3, %xmm0, %zmm1, %zmm0
|
2015-10-08 20:55:01 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%val = load i8, i8* %ptr
|
|
|
|
%r1 = insertelement <64 x i8> %x, i8 %val, i32 1
|
|
|
|
%r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
|
|
|
|
ret <64 x i8> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i8, i8* %ptr
|
|
|
|
%r1 = insertelement <32 x i8> %x, i8 %val, i32 1
|
|
|
|
%r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
|
|
|
|
ret <32 x i8> %r2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: insert_v16i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-10-08 20:55:01 +08:00
|
|
|
%val = load i8, i8* %ptr
|
|
|
|
%r1 = insertelement <16 x i8> %x, i8 %val, i32 3
|
|
|
|
%r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
|
|
|
|
ret <16 x i8> %r2
|
|
|
|
}
|
|
|
|
|
2015-09-20 14:52:42 +08:00
|
|
|
define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-LABEL: test_insert_128_v8i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-09-20 14:52:42 +08:00
|
|
|
%r = insertelement <8 x i64> %x, i64 %y, i32 1
|
|
|
|
ret <8 x i64> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_insert_128_v16i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-09-20 14:52:42 +08:00
|
|
|
%r = insertelement <16 x i32> %x, i32 %y, i32 1
|
|
|
|
ret <16 x i32> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-LABEL: test_insert_128_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-18 12:40:58 +08:00
|
|
|
; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0]
|
2017-08-17 23:40:25 +08:00
|
|
|
; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-09-20 14:52:42 +08:00
|
|
|
%r = insertelement <8 x double> %x, double %y, i32 1
|
|
|
|
ret <8 x double> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_insert_128_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-09-20 14:52:42 +08:00
|
|
|
%r = insertelement <16 x float> %x, float %y, i32 1
|
|
|
|
ret <16 x float> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_insert_128_v16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; CHECK-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-09-20 14:52:42 +08:00
|
|
|
%r = insertelement <16 x i16> %x, i16 %y, i32 10
|
|
|
|
ret <16 x i16> %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_insert_128_v32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; CHECK-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; CHECK-NEXT: retq
|
2015-09-20 14:52:42 +08:00
|
|
|
%r = insertelement <32 x i8> %x, i8 %y, i32 20
|
|
|
|
ret <32 x i8> %r
|
|
|
|
}
|
2016-08-11 20:13:46 +08:00
|
|
|
|
2016-08-14 13:25:07 +08:00
|
|
|
define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) {
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-LABEL: test_insertelement_v32i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-32, %rsp
|
|
|
|
; KNL-NEXT: subq $32, %rsp
|
2017-06-28 19:23:31 +08:00
|
|
|
; KNL-NEXT: xorl %eax, %eax
|
|
|
|
; KNL-NEXT: cmpl %esi, %edi
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: setb %al
|
2017-12-14 16:26:00 +08:00
|
|
|
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1
|
2017-12-22 10:30:26 +08:00
|
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
2017-12-14 16:26:00 +08:00
|
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1
|
2017-12-22 10:30:26 +08:00
|
|
|
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
2017-12-14 16:26:00 +08:00
|
|
|
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
2017-07-28 01:47:01 +08:00
|
|
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
2017-02-12 07:23:11 +08:00
|
|
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, (%rsp)
|
|
|
|
; KNL-NEXT: movl (%rsp), %eax
|
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-LABEL: test_insertelement_v32i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: cmpl %esi, %edi
|
2017-03-13 02:28:48 +08:00
|
|
|
; SKX-NEXT: setb %al
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0
|
|
|
|
; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1
|
|
|
|
; SKX-NEXT: kunpckwd %k0, %k1, %k0
|
2017-12-08 08:16:09 +08:00
|
|
|
; SKX-NEXT: kshiftrd $4, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %eax, %k2
|
|
|
|
; SKX-NEXT: kxord %k2, %k1, %k1
|
|
|
|
; SKX-NEXT: kshiftld $31, %k1, %k1
|
|
|
|
; SKX-NEXT: kshiftrd $27, %k1, %k1
|
|
|
|
; SKX-NEXT: kxord %k0, %k1, %k0
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp_res_i1 = icmp ult i32 %a, %b
|
|
|
|
%cmp_cmp_vec = icmp ult <32 x i32> %x, %y
|
|
|
|
%maskv = insertelement <32 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 4
|
|
|
|
%res = bitcast <32 x i1> %maskv to i32
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) {
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-LABEL: test_iinsertelement_v4i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: cmpl %esi, %edi
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: setb %al
|
2017-07-11 19:51:49 +08:00
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
|
2017-12-08 08:16:09 +08:00
|
|
|
; KNL-NEXT: kmovw %ecx, %k0
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
|
2017-12-08 04:10:04 +08:00
|
|
|
; KNL-NEXT: andl $1, %ecx
|
2017-12-08 08:16:09 +08:00
|
|
|
; KNL-NEXT: kmovw %ecx, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k2
|
|
|
|
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
|
|
|
; KNL-NEXT: korw %k1, %k2, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k1, %k2
|
|
|
|
; KNL-NEXT: kxorw %k0, %k2, %k0
|
|
|
|
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $14, %k0, %k0
|
|
|
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $2, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %eax, %k2
|
|
|
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $13, %k1, %k1
|
|
|
|
; KNL-NEXT: kxorw %k0, %k1, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $3, %k0, %k1
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: vpextrb $12, %xmm0, %eax
|
2017-12-08 08:16:09 +08:00
|
|
|
; KNL-NEXT: kmovw %eax, %k2
|
|
|
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $12, %k1, %k1
|
|
|
|
; KNL-NEXT: kxorw %k0, %k1, %k0
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %al killed %al killed %eax
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-LABEL: test_iinsertelement_v4i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: cmpl %esi, %edi
|
2017-03-13 02:28:48 +08:00
|
|
|
; SKX-NEXT: setb %al
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
|
2017-12-08 08:16:09 +08:00
|
|
|
; SKX-NEXT: kshiftrw $2, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %eax, %k2
|
|
|
|
; SKX-NEXT: kxorw %k2, %k1, %k1
|
|
|
|
; SKX-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $13, %k1, %k1
|
|
|
|
; SKX-NEXT: kxorw %k0, %k1, %k0
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %al killed %al killed %eax
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp_res_i1 = icmp ult i32 %a, %b
|
|
|
|
%cmp_cmp_vec = icmp ult <4 x i32> %x, %y
|
|
|
|
%maskv = insertelement <4 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 2
|
|
|
|
%res0 = shufflevector <4 x i1> %maskv, <4 x i1> undef , <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
|
|
|
|
%res = bitcast <8 x i1> %res0 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) {
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-LABEL: test_iinsertelement_v2i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: cmpl %esi, %edi
|
2017-03-13 02:28:48 +08:00
|
|
|
; KNL-NEXT: setb %al
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
|
2017-12-08 04:10:04 +08:00
|
|
|
; KNL-NEXT: andl $1, %ecx
|
|
|
|
; KNL-NEXT: kmovw %ecx, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
2017-12-08 08:16:09 +08:00
|
|
|
; KNL-NEXT: korw %k0, %k1, %k0
|
|
|
|
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; KNL-NEXT: kmovw %eax, %k2
|
|
|
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; KNL-NEXT: kshiftrw $14, %k1, %k1
|
|
|
|
; KNL-NEXT: kxorw %k0, %k1, %k0
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: kmovw %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %al killed %al killed %eax
|
2016-08-26 06:48:11 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-LABEL: test_iinsertelement_v2i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: cmpl %esi, %edi
|
2017-03-13 02:28:48 +08:00
|
|
|
; SKX-NEXT: setb %al
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
|
2017-12-08 08:16:09 +08:00
|
|
|
; SKX-NEXT: kshiftrw $1, %k0, %k1
|
|
|
|
; SKX-NEXT: kmovd %eax, %k2
|
|
|
|
; SKX-NEXT: kxorw %k2, %k1, %k1
|
|
|
|
; SKX-NEXT: kshiftlw $15, %k1, %k1
|
|
|
|
; SKX-NEXT: kshiftrw $14, %k1, %k1
|
|
|
|
; SKX-NEXT: kxorw %k0, %k1, %k0
|
2017-03-29 00:35:29 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %al killed %al killed %eax
|
2016-08-14 13:25:07 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%cmp_res_i1 = icmp ult i32 %a, %b
|
|
|
|
%cmp_cmp_vec = icmp ult <2 x i64> %x, %y
|
|
|
|
%maskv = insertelement <2 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 1
|
|
|
|
%res0 = shufflevector <2 x i1> %maskv, <2 x i1> undef , <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
|
|
|
|
%res = bitcast <8 x i1> %res0 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
2016-08-11 20:13:46 +08:00
|
|
|
define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|
|
|
; KNL-LABEL: test_extractelement_v2i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
[DAGCombiner] allow transforming (select Cond, C +/- 1, C) to (add(ext Cond), C)
select Cond, C +/- 1, C --> add(ext Cond), C -- with a target hook.
This is part of the ongoing process to obsolete D24480. The motivation is to
canonicalize to select IR in InstCombine whenever possible, so we need to have a way to
undo that easily in codegen.
PowerPC is an obvious winner for this kind of transform because it has fast and complete
bit-twiddling abilities but generally lousy conditional execution perf (although this might
have changed in recent implementations).
x86 also sees some wins, but the effect is limited because these transforms already mostly
exist in its target-specific combineSelectOfTwoConstants(). The fact that we see any x86
changes just shows that that code is a mess of special-case holes. We may be able to remove
some of that logic now.
My guess is that other targets will want to enable this hook for most cases. The likely
follow-ups would be to add value type and/or the constants themselves as parameters for the
hook. As the tests in select_const.ll show, we can transform any select-of-constants to
math/logic, but the general transform for any 2 constants needs one more instruction
(multiply or 'and').
ARM is one target that I think may not want this for most cases. I see infinite loops there
because it wants to use selects to enable conditionally executed instructions.
Differential Revision: https://reviews.llvm.org/D30537
llvm-svn: 296977
2017-03-05 03:18:09 +08:00
|
|
|
; KNL-NEXT: vpextrb $0, %xmm0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: andb $1, %al
|
|
|
|
; KNL-NEXT: movb $4, %cl
|
|
|
|
; KNL-NEXT: subb %al, %cl
|
|
|
|
; KNL-NEXT: movzbl %cl, %eax
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_v2i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: andb $1, %al
|
|
|
|
; SKX-NEXT: movb $4, %cl
|
|
|
|
; SKX-NEXT: subb %al, %cl
|
|
|
|
; SKX-NEXT: movzbl %cl, %eax
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <2 x i64> %a, %b
|
|
|
|
%t2 = extractelement <2 x i1> %t1, i32 0
|
|
|
|
%res = select i1 %t2, i8 3, i8 4
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
2017-03-01 02:02:38 +08:00
|
|
|
define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) {
|
|
|
|
; KNL-LABEL: extractelement_v2i1_alt:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-03-01 02:02:38 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpextrb $0, %xmm0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: andb $1, %al
|
|
|
|
; KNL-NEXT: movb $4, %cl
|
|
|
|
; KNL-NEXT: subb %al, %cl
|
|
|
|
; KNL-NEXT: movzbl %cl, %eax
|
2017-03-01 02:02:38 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: extractelement_v2i1_alt:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-03-01 02:02:38 +08:00
|
|
|
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: andb $1, %al
|
|
|
|
; SKX-NEXT: movb $4, %cl
|
|
|
|
; SKX-NEXT: subb %al, %cl
|
|
|
|
; SKX-NEXT: movzbl %cl, %eax
|
2017-03-01 02:02:38 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <2 x i64> %a, %b
|
|
|
|
%t2 = extractelement <2 x i1> %t1, i32 0
|
|
|
|
%sext = sext i1 %t2 to i8
|
|
|
|
%res = add i8 %sext, 4
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
2016-08-11 20:13:46 +08:00
|
|
|
define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; KNL-LABEL: test_extractelement_v4i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-07-11 19:51:49 +08:00
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpextrd $3, %xmm0, %eax
|
|
|
|
; KNL-NEXT: andl $1, %eax
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_v4i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
|
2017-12-15 02:35:25 +08:00
|
|
|
; SKX-NEXT: kshiftrw $3, %k0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <4 x i32> %a, %b
|
|
|
|
%t2 = extractelement <4 x i1> %t1, i32 3
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) {
|
|
|
|
; KNL-LABEL: test_extractelement_v32i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpextrb $2, %xmm0, %eax
|
|
|
|
; KNL-NEXT: andl $1, %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_v32i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
|
2017-12-15 02:35:25 +08:00
|
|
|
; SKX-NEXT: kshiftrd $2, %k0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2016-08-26 06:48:11 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <32 x i8> %a, %b
|
|
|
|
%t2 = extractelement <32 x i1> %t1, i32 2
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) {
|
|
|
|
; KNL-LABEL: test_extractelement_v64i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm0 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm0, %ymm3, %ymm2
|
|
|
|
; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; KNL-NEXT: vpextrb $15, %xmm0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: andb $1, %al
|
|
|
|
; KNL-NEXT: movb $4, %cl
|
|
|
|
; KNL-NEXT: subb %al, %cl
|
|
|
|
; KNL-NEXT: movzbl %cl, %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2016-08-11 20:13:46 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_v64i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kshiftrq $63, %k0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: andb $1, %al
|
|
|
|
; SKX-NEXT: movb $4, %cl
|
|
|
|
; SKX-NEXT: subb %al, %cl
|
|
|
|
; SKX-NEXT: movzbl %cl, %eax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2016-08-11 20:13:46 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <64 x i8> %a, %b
|
|
|
|
%t2 = extractelement <64 x i1> %t1, i32 63
|
|
|
|
%res = select i1 %t2, i8 3, i8 4
|
|
|
|
ret i8 %res
|
|
|
|
}
|
2017-02-09 15:39:19 +08:00
|
|
|
|
2017-03-01 02:02:38 +08:00
|
|
|
define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) {
|
|
|
|
; KNL-LABEL: extractelement_v64i1_alt:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-03-01 02:02:38 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm0 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm0, %ymm3, %ymm2
|
|
|
|
; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
|
|
|
; KNL-NEXT: vpextrb $15, %xmm0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: andb $1, %al
|
|
|
|
; KNL-NEXT: movb $4, %cl
|
|
|
|
; KNL-NEXT: subb %al, %cl
|
|
|
|
; KNL-NEXT: movzbl %cl, %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-03-01 02:02:38 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: extractelement_v64i1_alt:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-03-01 02:02:38 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
|
|
|
; SKX-NEXT: kshiftrq $63, %k0, %k0
|
2017-03-29 14:55:28 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: andb $1, %al
|
|
|
|
; SKX-NEXT: movb $4, %cl
|
|
|
|
; SKX-NEXT: subb %al, %cl
|
|
|
|
; SKX-NEXT: movzbl %cl, %eax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-03-01 02:02:38 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <64 x i8> %a, %b
|
|
|
|
%t2 = extractelement <64 x i1> %t1, i32 63
|
|
|
|
%sext = sext i1 %t2 to i8
|
|
|
|
%res = add i8 %sext, 4
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
2017-02-09 15:39:19 +08:00
|
|
|
define i64 @test_extractelement_variable_v2i64(<2 x i64> %t1, i32 %index) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: andl $1, %edi
|
|
|
|
; CHECK-NEXT: movq -24(%rsp,%rdi,8), %rax
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <2 x i64> %t1, i32 %index
|
|
|
|
ret i64 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test_extractelement_variable_v4i64(<4 x i64> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $3, %edi
|
|
|
|
; CHECK-NEXT: movq (%rsp,%rdi,8), %rax
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <4 x i64> %t1, i32 %index
|
|
|
|
ret i64 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test_extractelement_variable_v8i64(<8 x i64> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v8i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: movq (%rsp,%rdi,8), %rax
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <8 x i64> %t1, i32 %index
|
|
|
|
ret i64 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @test_extractelement_variable_v2f64(<2 x double> %t1, i32 %index) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v2f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: andl $1, %edi
|
|
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <2 x double> %t1, i32 %index
|
|
|
|
ret double %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @test_extractelement_variable_v4f64(<4 x double> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v4f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $3, %edi
|
|
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <4 x double> %t1, i32 %index
|
|
|
|
ret double %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @test_extractelement_variable_v8f64(<8 x double> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v8f64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <8 x double> %t1, i32 %index
|
|
|
|
ret double %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_extractelement_variable_v4i32(<4 x i32> %t1, i32 %index) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: andl $3, %edi
|
|
|
|
; CHECK-NEXT: movl -24(%rsp,%rdi,4), %eax
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <4 x i32> %t1, i32 %index
|
|
|
|
ret i32 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_extractelement_variable_v8i32(<8 x i32> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: movl (%rsp,%rdi,4), %eax
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <8 x i32> %t1, i32 %index
|
|
|
|
ret i32 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_extractelement_variable_v16i32(<16 x i32> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v16i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $15, %edi
|
|
|
|
; CHECK-NEXT: movl (%rsp,%rdi,4), %eax
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <16 x i32> %t1, i32 %index
|
|
|
|
ret i32 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @test_extractelement_variable_v4f32(<4 x float> %t1, i32 %index) {
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v4f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 05:20:06 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: andl $3, %edi
|
|
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <4 x float> %t1, i32 %index
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @test_extractelement_variable_v8f32(<8 x float> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v8f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <8 x float> %t1, i32 %index
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @test_extractelement_variable_v16f32(<16 x float> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v16f32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-64, %rsp
|
|
|
|
; CHECK-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $15, %edi
|
|
|
|
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <16 x float> %t1, i32 %index
|
|
|
|
ret float %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @test_extractelement_variable_v8i16(<8 x i16> %t1, i32 %index) {
|
2017-08-01 23:31:24 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v8i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 23:31:24 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: andl $7, %edi
|
|
|
|
; CHECK-NEXT: movzwl -24(%rsp,%rdi,2), %eax
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <8 x i16> %t1, i32 %index
|
|
|
|
ret i16 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @test_extractelement_variable_v16i16(<16 x i16> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v16i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $15, %edi
|
|
|
|
; CHECK-NEXT: movzwl (%rsp,%rdi,2), %eax
|
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <16 x i16> %t1, i32 %index
|
|
|
|
ret i16 %t2
|
|
|
|
}
|
|
|
|
|
2017-02-20 22:16:29 +08:00
|
|
|
define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_variable_v32i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-64, %rsp
|
|
|
|
; KNL-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; KNL-NEXT: andl $31, %edi
|
|
|
|
; KNL-NEXT: movzwl (%rsp,%rdi,2), %eax
|
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_variable_v32i16:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; SKX-NEXT: andq $-64, %rsp
|
|
|
|
; SKX-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 23:31:24 +08:00
|
|
|
; SKX-NEXT: vmovaps %zmm0, (%rsp)
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: andl $31, %edi
|
|
|
|
; SKX-NEXT: movzwl (%rsp,%rdi,2), %eax
|
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t2 = extractelement <32 x i16> %t1, i32 %index
|
|
|
|
ret i16 %t2
|
|
|
|
}
|
2017-02-09 15:39:19 +08:00
|
|
|
|
|
|
|
define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v16i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; CHECK-NEXT: andl $15, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; CHECK-NEXT: movb -24(%rsp,%rdi), %al
|
2017-09-27 22:44:15 +08:00
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
%t2 = extractelement <16 x i8> %t1, i32 %index
|
|
|
|
ret i8 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-LABEL: test_extractelement_variable_v32i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: pushq %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
|
|
; CHECK-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; CHECK-NEXT: andl $31, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; CHECK-NEXT: movb (%rsp,%rdi), %al
|
2017-10-10 23:58:54 +08:00
|
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
|
|
; CHECK-NEXT: popq %rbp
|
|
|
|
; CHECK-NEXT: vzeroupper
|
|
|
|
; CHECK-NEXT: retq
|
2017-02-09 15:39:19 +08:00
|
|
|
|
|
|
|
%t2 = extractelement <32 x i8> %t1, i32 %index
|
|
|
|
ret i8 %t2
|
|
|
|
}
|
|
|
|
|
2017-02-20 22:16:29 +08:00
|
|
|
define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_variable_v64i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-64, %rsp
|
|
|
|
; KNL-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; KNL-NEXT: andl $63, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: movb (%rsp,%rdi), %al
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_variable_v64i8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; SKX-NEXT: andq $-64, %rsp
|
|
|
|
; SKX-NEXT: subq $128, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-08-01 23:31:24 +08:00
|
|
|
; SKX-NEXT: vmovaps %zmm0, (%rsp)
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: andl $63, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: movb (%rsp,%rdi), %al
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
|
|
|
|
%t2 = extractelement <64 x i8> %t1, i32 %index
|
|
|
|
ret i8 %t2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_variable_v64i8_indexi8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-64, %rsp
|
|
|
|
; KNL-NEXT: subq $128, %rsp
|
|
|
|
; KNL-NEXT: addb %dil, %dil
|
|
|
|
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
|
|
|
; KNL-NEXT: movzbl %dil, %eax
|
|
|
|
; KNL-NEXT: andl $63, %eax
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: movb (%rsp,%rax), %al
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-02-20 22:16:29 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_variable_v64i8_indexi8:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; SKX-NEXT: andq $-64, %rsp
|
|
|
|
; SKX-NEXT: subq $128, %rsp
|
|
|
|
; SKX-NEXT: addb %dil, %dil
|
2017-08-01 23:31:24 +08:00
|
|
|
; SKX-NEXT: vmovaps %zmm0, (%rsp)
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: movzbl %dil, %eax
|
|
|
|
; SKX-NEXT: andl $63, %eax
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: movb (%rsp,%rax), %al
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-02-20 22:16:29 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
|
|
|
|
%i = add i8 %index, %index
|
|
|
|
%t2 = extractelement <64 x i8> %t1, i8 %i
|
|
|
|
ret i8 %t2
|
|
|
|
}
|
2017-02-21 22:01:25 +08:00
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_varible_v2i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: andl $1, %edi
|
|
|
|
; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
|
|
|
|
; KNL-NEXT: andl $1, %eax
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_varible_v2i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: vpmovm2q %k0, %xmm0
|
|
|
|
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; SKX-NEXT: andl $1, %edi
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <2 x i64> %a, %b
|
|
|
|
%t2 = extractelement <2 x i1> %t1, i32 %index
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_varible_v4i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-07-11 19:51:49 +08:00
|
|
|
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: andl $3, %edi
|
|
|
|
; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
|
|
|
|
; KNL-NEXT: andl $1, %eax
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_varible_v4i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
|
|
|
|
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
|
|
|
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
|
|
|
; SKX-NEXT: andl $3, %edi
|
2017-05-19 20:35:15 +08:00
|
|
|
; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <4 x i32> %a, %b
|
|
|
|
%t2 = extractelement <4 x i1> %t1, i32 %index
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_varible_v8i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
|
|
|
; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
|
|
|
|
; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
|
2017-12-23 01:18:11 +08:00
|
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
|
|
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
|
|
|
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: andl $7, %edi
|
2017-12-23 01:18:11 +08:00
|
|
|
; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: andl $1, %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_varible_v8i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: vpmovm2w %k0, %xmm0
|
|
|
|
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $7, %edi
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <8 x i32> %a, %b
|
|
|
|
%t2 = extractelement <8 x i1> %t1, i32 %index
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_varible_v16i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
|
|
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
2017-12-23 01:18:11 +08:00
|
|
|
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
|
|
|
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: andl $15, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: movzbl -24(%rsp,%rdi), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: andl $1, %eax
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_varible_v16i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
|
|
|
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $15, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <16 x i32> %a, %b
|
|
|
|
%t2 = extractelement <16 x i1> %t1, i32 %index
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_extractelement_varible_v32i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; KNL: ## %bb.0:
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-32, %rsp
|
|
|
|
; KNL-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vmovdqa %ymm0, (%rsp)
|
|
|
|
; KNL-NEXT: andl $31, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: movzbl (%rsp,%rdi), %eax
|
2017-05-19 20:35:15 +08:00
|
|
|
; KNL-NEXT: andl $1, %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
2017-09-27 22:44:15 +08:00
|
|
|
; KNL-NEXT: vzeroupper
|
2017-02-21 22:01:25 +08:00
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_extractelement_varible_v32i1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SKX: ## %bb.0:
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: andq $-32, %rsp
|
|
|
|
; SKX-NEXT: subq $64, %rsp
|
2017-12-07 18:40:31 +08:00
|
|
|
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: vpmovm2b %k0, %ymm0
|
|
|
|
; SKX-NEXT: vmovdqa %ymm0, (%rsp)
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $31, %edi
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: movzbl (%rsp,%rdi), %eax
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: andl $1, %eax
|
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
2017-03-03 17:03:24 +08:00
|
|
|
; SKX-NEXT: vzeroupper
|
2017-02-21 22:01:25 +08:00
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <32 x i8> %a, %b
|
|
|
|
%t2 = extractelement <32 x i1> %t1, i32 %index
|
|
|
|
%res = zext i1 %t2 to i8
|
|
|
|
ret i8 %res
|
|
|
|
}
|
|
|
|
|
2017-09-29 00:53:16 +08:00
|
|
|
define <8 x i64> @insert_double_zero(<2 x i64> %a) nounwind {
|
|
|
|
; CHECK-LABEL: insert_double_zero:
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK: ## %bb.0:
|
2017-09-29 00:53:16 +08:00
|
|
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
|
|
; CHECK-NEXT: vinsertf32x4 $2, %xmm0, %zmm1, %zmm0
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
%b = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%d = shufflevector <4 x i64> %b, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
%e = shufflevector <8 x i64> %d, <8 x i64> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <8 x i64> %e
|
|
|
|
}
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
|
|
|
|
define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_insertelement_variable_v32i1:
|
|
|
|
; KNL: ## %bb.0:
|
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-32, %rsp
|
|
|
|
; KNL-NEXT: subq $96, %rsp
|
|
|
|
; KNL-NEXT: ## kill: def %esi killed %esi def %rsi
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: andl $31, %esi
|
|
|
|
; KNL-NEXT: testb %dil, %dil
|
|
|
|
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: setne 32(%rsp,%rsi)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, (%rsp)
|
|
|
|
; KNL-NEXT: movl (%rsp), %eax
|
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_insertelement_variable_v32i1:
|
|
|
|
; SKX: ## %bb.0:
|
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: andq $-32, %rsp
|
|
|
|
; SKX-NEXT: subq $64, %rsp
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
|
|
|
|
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-12-22 06:08:23 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: andl $31, %esi
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: testb %dil, %dil
|
|
|
|
; SKX-NEXT: vpmovm2b %k0, %ymm0
|
|
|
|
; SKX-NEXT: vmovdqa %ymm0, (%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: setne (%rsp,%rsi)
|
2017-12-23 01:18:11 +08:00
|
|
|
; SKX-NEXT: vpsllw $7, (%rsp), %ymm0
|
|
|
|
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: kmovd %k0, %eax
|
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <32 x i8> %a, zeroinitializer
|
|
|
|
%t2 = icmp ugt i8 %b, 0
|
|
|
|
%t3 = insertelement <32 x i1> %t1, i1 %t2, i32 %index
|
|
|
|
%t4 = bitcast <32 x i1> %t3 to i32
|
|
|
|
ret i32 %t4
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_insertelement_variable_v64i1:
|
|
|
|
; KNL: ## %bb.0:
|
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-64, %rsp
|
|
|
|
; KNL-NEXT: subq $192, %rsp
|
|
|
|
; KNL-NEXT: ## kill: def %esi killed %esi def %rsi
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: andl $63, %esi
|
|
|
|
; KNL-NEXT: testb %dil, %dil
|
|
|
|
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: setne 64(%rsp,%rsi)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
|
|
|
|
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, (%rsp)
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
|
|
; KNL-NEXT: movl (%rsp), %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: orq %rcx, %rax
|
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_insertelement_variable_v64i1:
|
|
|
|
; SKX: ## %bb.0:
|
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; SKX-NEXT: andq $-64, %rsp
|
|
|
|
; SKX-NEXT: subq $128, %rsp
|
|
|
|
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
|
|
|
|
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-12-22 06:08:23 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: andl $63, %esi
|
|
|
|
; SKX-NEXT: testb %dil, %dil
|
2017-12-22 06:08:23 +08:00
|
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: setne (%rsp,%rsi)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: kmovq %k0, %rax
|
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <64 x i8> %a, zeroinitializer
|
|
|
|
%t2 = icmp ugt i8 %b, 0
|
|
|
|
%t3 = insertelement <64 x i1> %t1, i1 %t2, i32 %index
|
|
|
|
%t4 = bitcast <64 x i1> %t3 to i64
|
|
|
|
ret i64 %t4
|
|
|
|
}
|
|
|
|
|
|
|
|
define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_insertelement_variable_v96i1:
|
|
|
|
; KNL: ## %bb.0:
|
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-128, %rsp
|
|
|
|
; KNL-NEXT: subq $384, %rsp ## imm = 0x180
|
|
|
|
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm0, %xmm0
|
|
|
|
; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm1, %xmm1
|
|
|
|
; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vmovd %edi, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2
|
|
|
|
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2
|
|
|
|
; KNL-NEXT: vpxor %ymm3, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpxor %ymm3, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm3, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
|
|
; KNL-NEXT: movl 744(%rbp), %eax
|
|
|
|
; KNL-NEXT: andl $127, %eax
|
|
|
|
; KNL-NEXT: cmpb $0, 736(%rbp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: setne 128(%rsp,%rax)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm4, %zmm4
|
|
|
|
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm2, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm3, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, (%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: orq %rcx, %rax
|
|
|
|
; KNL-NEXT: movl (%rsp), %ecx
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx
|
|
|
|
; KNL-NEXT: shlq $32, %rdx
|
|
|
|
; KNL-NEXT: orq %rcx, %rdx
|
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_insertelement_variable_v96i1:
|
|
|
|
; SKX: ## %bb.0:
|
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; SKX-NEXT: andq $-128, %rsp
|
|
|
|
; SKX-NEXT: subq $256, %rsp ## imm = 0x100
|
|
|
|
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; SKX-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0
|
|
|
|
; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; SKX-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; SKX-NEXT: vmovd %edi, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $6, 16(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $7, 24(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $8, 32(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $9, 40(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $10, 48(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $11, 56(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $12, 64(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $13, 72(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $14, 80(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $15, 88(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; SKX-NEXT: vpinsrb $1, 104(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $2, 112(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $3, 120(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $4, 128(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $5, 136(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $6, 144(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $7, 152(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $8, 160(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $9, 168(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $10, 176(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $11, 184(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $12, 192(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $13, 200(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $14, 208(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $15, 216(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
|
|
; SKX-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
|
|
|
; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; SKX-NEXT: vpinsrb $1, 488(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $2, 496(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $3, 504(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $4, 512(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $5, 520(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $6, 528(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $7, 536(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $8, 544(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $9, 552(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $10, 560(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $11, 568(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $12, 576(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $13, 584(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $14, 592(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vpinsrb $15, 600(%rbp), %xmm1, %xmm1
|
|
|
|
; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; SKX-NEXT: vpinsrb $1, 616(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $2, 624(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $3, 632(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $4, 640(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $5, 648(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $6, 656(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $7, 664(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $8, 672(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $9, 680(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $10, 688(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $11, 696(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $12, 704(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $13, 712(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $14, 720(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2
|
|
|
|
; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
|
|
|
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2017-12-22 10:30:26 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0
|
|
|
|
; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: movl 744(%rbp), %eax
|
|
|
|
; SKX-NEXT: andl $127, %eax
|
|
|
|
; SKX-NEXT: cmpb $0, 736(%rbp)
|
2017-12-22 10:30:26 +08:00
|
|
|
; SKX-NEXT: vpmovm2b %k1, %zmm0
|
|
|
|
; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: setne (%rsp,%rax)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k1
|
|
|
|
; SKX-NEXT: kmovq %k1, %rax
|
|
|
|
; SKX-NEXT: kmovq %k0, %rdx
|
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <96 x i8> %a, zeroinitializer
|
|
|
|
%t2 = icmp ugt i8 %b, 0
|
|
|
|
%t3 = insertelement <96 x i1> %t1, i1 %t2, i32 %index
|
|
|
|
%t4 = bitcast <96 x i1> %t3 to i96
|
|
|
|
ret i96 %t4
|
|
|
|
}
|
|
|
|
|
|
|
|
define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index) {
|
|
|
|
; KNL-LABEL: test_insertelement_variable_v128i1:
|
|
|
|
; KNL: ## %bb.0:
|
|
|
|
; KNL-NEXT: pushq %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; KNL-NEXT: movq %rsp, %rbp
|
|
|
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; KNL-NEXT: andq $-128, %rsp
|
|
|
|
; KNL-NEXT: subq $384, %rsp ## imm = 0x180
|
|
|
|
; KNL-NEXT: ## kill: def %esi killed %esi def %rsi
|
|
|
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
|
|
|
; KNL-NEXT: vpxor %ymm4, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0
|
|
|
|
; KNL-NEXT: vpxor %ymm4, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm4, %ymm1, %ymm1
|
|
|
|
; KNL-NEXT: vpxor %ymm4, %ymm2, %ymm2
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm4, %ymm2, %ymm2
|
|
|
|
; KNL-NEXT: vpxor %ymm4, %ymm3, %ymm3
|
|
|
|
; KNL-NEXT: vpcmpgtb %ymm4, %ymm3, %ymm3
|
|
|
|
; KNL-NEXT: andl $127, %esi
|
|
|
|
; KNL-NEXT: testb %dil, %dil
|
|
|
|
; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; KNL-NEXT: setne 128(%rsp,%rsi)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
|
|
|
|
; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm4, %zmm4
|
|
|
|
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm2, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm3, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, (%rsp)
|
|
|
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
|
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
|
|
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
|
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
|
|
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; KNL-NEXT: shlq $32, %rax
|
|
|
|
; KNL-NEXT: orq %rcx, %rax
|
|
|
|
; KNL-NEXT: movl (%rsp), %ecx
|
|
|
|
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx
|
|
|
|
; KNL-NEXT: shlq $32, %rdx
|
|
|
|
; KNL-NEXT: orq %rcx, %rdx
|
|
|
|
; KNL-NEXT: movq %rbp, %rsp
|
|
|
|
; KNL-NEXT: popq %rbp
|
|
|
|
; KNL-NEXT: vzeroupper
|
|
|
|
; KNL-NEXT: retq
|
|
|
|
;
|
|
|
|
; SKX-LABEL: test_insertelement_variable_v128i1:
|
|
|
|
; SKX: ## %bb.0:
|
|
|
|
; SKX-NEXT: pushq %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; SKX-NEXT: .cfi_offset %rbp, -16
|
|
|
|
; SKX-NEXT: movq %rsp, %rbp
|
|
|
|
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
|
|
|
; SKX-NEXT: andq $-128, %rsp
|
|
|
|
; SKX-NEXT: subq $256, %rsp ## imm = 0x100
|
|
|
|
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
|
|
|
|
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
2017-12-22 10:30:26 +08:00
|
|
|
; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0
|
|
|
|
; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: andl $127, %esi
|
|
|
|
; SKX-NEXT: testb %dil, %dil
|
2017-12-22 10:30:26 +08:00
|
|
|
; SKX-NEXT: vpmovm2b %k1, %zmm0
|
|
|
|
; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp)
|
|
|
|
; SKX-NEXT: vpmovm2b %k0, %zmm0
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
2017-12-23 01:18:13 +08:00
|
|
|
; SKX-NEXT: setne (%rsp,%rsi)
|
[SelectionDAG][X86] Fix insert_vector_elt lowering for v32i1/v64i1 with non-constant index
Summary:
Currently we don't handle v32i1/v64i1 insert_vector_elt correctly as we fail to look at the number of elements closely and assume it can only be v16i1 or v8i1.
We also can't type legalize v64i1 insert_vector_elt correctly on KNL due to the type not being byte addressable as required by the legalizing through memory accesses path requires.
For the first issue, the patch now tries to pick a 512-bit register with the correct number of elements and promotes to that.
For the second issue, we now extend the vector to a byte addressable type, do the stores to memory, load the two halves, and then truncate the halves back to the original type. Technically since we changed the type, we may not need two loads, but actually checking that is more work and for the v64i1 case we do need them.
Reviewers: RKSimon, delena, spatel, zvi
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D40942
llvm-svn: 320849
2017-12-16 03:35:22 +08:00
|
|
|
; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k0
|
|
|
|
; SKX-NEXT: vpsllw $7, (%rsp), %zmm0
|
|
|
|
; SKX-NEXT: vpmovb2m %zmm0, %k1
|
|
|
|
; SKX-NEXT: kmovq %k1, %rax
|
|
|
|
; SKX-NEXT: kmovq %k0, %rdx
|
|
|
|
; SKX-NEXT: movq %rbp, %rsp
|
|
|
|
; SKX-NEXT: popq %rbp
|
|
|
|
; SKX-NEXT: vzeroupper
|
|
|
|
; SKX-NEXT: retq
|
|
|
|
%t1 = icmp ugt <128 x i8> %a, zeroinitializer
|
|
|
|
%t2 = icmp ugt i8 %b, 0
|
|
|
|
%t3 = insertelement <128 x i1> %t1, i1 %t2, i32 %index
|
|
|
|
%t4 = bitcast <128 x i1> %t3 to i128
|
|
|
|
ret i128 %t4
|
|
|
|
}
|