2016-02-19 22:38:09 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=ALL --check-prefix=X32
|
|
|
|
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=ALL --check-prefix=X64
|
|
|
|
|
|
|
|
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx-builtins.c
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_add_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_add_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_add_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fadd <4 x double> %a0, %a1
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_add_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_add_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_add_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fadd <8 x float> %a0, %a1
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_addsub_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_addsub_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_addsub_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_addsub_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_addsub_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_addsub_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_and_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_and_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_and_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vandps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <4 x double> %a0 to <4 x i64>
|
|
|
|
%2 = bitcast <4 x double> %a1 to <4 x i64>
|
|
|
|
%res = and <4 x i64> %1, %2
|
|
|
|
%bc = bitcast <4 x i64> %res to <4 x double>
|
|
|
|
ret <4 x double> %bc
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_and_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_and_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_and_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vandps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <8 x float> %a0 to <8 x i32>
|
|
|
|
%2 = bitcast <8 x float> %a1 to <8 x i32>
|
|
|
|
%res = and <8 x i32> %1, %2
|
|
|
|
%bc = bitcast <8 x i32> %res to <8 x float>
|
|
|
|
ret <8 x float> %bc
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_andnot_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_andnot_pd:
|
|
|
|
; X32: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
[x86, SSE] AVX1 PR28129 (256-bit all-ones rematerialization)
Further perf tests on Jaguar indicate that:
vxorps %ymm0, %ymm0, %ymm0
vcmpps $15, %ymm0, %ymm0, %ymm0
is consistently faster (by about 9%) than:
vpcmpeqd %xmm0, %xmm0, %xmm0
vinsertf128 $1, %xmm0, %ymm0, %ymm0
Testing equivalent code on a SandyBridge (E5-2640) puts it slightly (~3%) faster as well.
Committed on behalf of @dtemirbulatov
Differential Revision: https://reviews.llvm.org/D32416
llvm-svn: 302989
2017-05-13 21:42:35 +08:00
|
|
|
; X32-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_andnot_pd:
|
|
|
|
; X64: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
[x86, SSE] AVX1 PR28129 (256-bit all-ones rematerialization)
Further perf tests on Jaguar indicate that:
vxorps %ymm0, %ymm0, %ymm0
vcmpps $15, %ymm0, %ymm0, %ymm0
is consistently faster (by about 9%) than:
vpcmpeqd %xmm0, %xmm0, %xmm0
vinsertf128 $1, %xmm0, %ymm0, %ymm0
Testing equivalent code on a SandyBridge (E5-2640) puts it slightly (~3%) faster as well.
Committed on behalf of @dtemirbulatov
Differential Revision: https://reviews.llvm.org/D32416
llvm-svn: 302989
2017-05-13 21:42:35 +08:00
|
|
|
; X64-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: vandps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <4 x double> %a0 to <4 x i64>
|
|
|
|
%2 = bitcast <4 x double> %a1 to <4 x i64>
|
|
|
|
%3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
|
|
|
|
%res = and <4 x i64> %3, %2
|
|
|
|
%bc = bitcast <4 x i64> %res to <4 x double>
|
|
|
|
ret <4 x double> %bc
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_andnot_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_andnot_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vandnps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_andnot_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vandnps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <8 x float> %a0 to <8 x i32>
|
|
|
|
%2 = bitcast <8 x float> %a1 to <8 x i32>
|
|
|
|
%3 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%res = and <8 x i32> %3, %2
|
|
|
|
%bc = bitcast <8 x i32> %res to <8 x float>
|
|
|
|
ret <8 x float> %bc
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_blend_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_blend_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_blend_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_blend_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_blend_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4,5,6],ymm1[7]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_blend_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4,5,6],ymm1[7]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_blendv_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_blendv_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_blendv_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_blendv_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_blendv_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_blendv_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_broadcast_pd(<2 x double>* %a0) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_broadcast_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-14 20:07:43 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_broadcast_pd:
|
|
|
|
; X64: # BB#0:
|
2016-07-14 20:07:43 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-07-22 21:58:44 +08:00
|
|
|
%ld = load <2 x double>, <2 x double>* %a0
|
|
|
|
%res = shufflevector <2 x double> %ld, <2 x double> %ld, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_broadcast_ps(<4 x float>* %a0) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_broadcast_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
2016-07-14 20:07:43 +08:00
|
|
|
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_broadcast_ps:
|
|
|
|
; X64: # BB#0:
|
2016-07-14 20:07:43 +08:00
|
|
|
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-07-22 21:58:44 +08:00
|
|
|
%ld = load <4 x float>, <4 x float>* %a0
|
|
|
|
%res = shufflevector <4 x float> %ld, <4 x float> %ld, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_broadcast_sd(double* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_broadcast_sd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vbroadcastsd (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_broadcast_sd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ld = load double, double* %a0
|
|
|
|
%ins0 = insertelement <4 x double> undef, double %ld, i32 0
|
|
|
|
%ins1 = insertelement <4 x double> %ins0, double %ld, i32 1
|
|
|
|
%ins2 = insertelement <4 x double> %ins1, double %ld, i32 2
|
|
|
|
%ins3 = insertelement <4 x double> %ins2, double %ld, i32 3
|
|
|
|
ret <4 x double> %ins3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @test_mm_broadcast_ss(float* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm_broadcast_ss:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vbroadcastss (%eax), %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_broadcast_ss:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vbroadcastss (%rdi), %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ld = load float, float* %a0
|
|
|
|
%ins0 = insertelement <4 x float> undef, float %ld, i32 0
|
|
|
|
%ins1 = insertelement <4 x float> %ins0, float %ld, i32 1
|
|
|
|
%ins2 = insertelement <4 x float> %ins1, float %ld, i32 2
|
|
|
|
%ins3 = insertelement <4 x float> %ins2, float %ld, i32 3
|
|
|
|
ret <4 x float> %ins3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_broadcast_ss(float* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_broadcast_ss:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vbroadcastss (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_broadcast_ss:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vbroadcastss (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ld = load float, float* %a0
|
|
|
|
%ins0 = insertelement <8 x float> undef, float %ld, i32 0
|
|
|
|
%ins1 = insertelement <8 x float> %ins0, float %ld, i32 1
|
|
|
|
%ins2 = insertelement <8 x float> %ins1, float %ld, i32 2
|
|
|
|
%ins3 = insertelement <8 x float> %ins2, float %ld, i32 3
|
|
|
|
%ins4 = insertelement <8 x float> %ins3, float %ld, i32 4
|
|
|
|
%ins5 = insertelement <8 x float> %ins4, float %ld, i32 5
|
|
|
|
%ins6 = insertelement <8 x float> %ins5, float %ld, i32 6
|
|
|
|
%ins7 = insertelement <8 x float> %ins6, float %ld, i32 7
|
|
|
|
ret <8 x float> %ins7
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_castpd_ps(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castpd_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castpd_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = bitcast <4 x double> %a0 to <8 x float>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_castpd_si256(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castpd_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castpd_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = bitcast <4 x double> %a0 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castpd128_pd256:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castpd128_pd256:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <2 x double> %a0, <2 x double> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test_mm256_castpd256_pd128(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castpd256_pd128:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castpd256_pd128:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a0, <2 x i32> <i32 0, i32 1>
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_castps_pd(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castps_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castps_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = bitcast <8 x float> %a0 to <4 x double>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_castps_si256(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castps_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castps_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = bitcast <8 x float> %a0 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castps128_ps256:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castps128_ps256:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x float> %a0, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castps256_ps128:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castps256_ps128:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_castsi128_si256(<2 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castsi128_si256:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castsi128_si256:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <2 x i64> %a0, <2 x i64> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_castsi256_pd(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castsi256_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castsi256_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = bitcast <4 x i64> %a0 to <4 x double>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_castsi256_ps(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castsi256_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castsi256_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = bitcast <4 x i64> %a0 to <8 x float>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test_mm256_castsi256_si128(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_castsi256_si128:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_castsi256_si128:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 0, i32 1>
|
|
|
|
ret <2 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_ceil_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_ceil_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vroundpd $2, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_ceil_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vroundpd $2, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 2)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_ceil_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_ceil_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vroundps $2, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_ceil_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vroundps $2, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 2)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x double> @test_mm_cmp_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_cmp_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcmpgepd %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_cmp_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcmpgepd %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 13)
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_cmp_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cmp_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcmpgepd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cmp_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcmpgepd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 13)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x float> @test_mm_cmp_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_cmp_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcmpgeps %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_cmp_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcmpgeps %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 13)
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_cmp_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cmp_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcmpgeps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cmp_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcmpgeps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 13)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x double> @test_mm_cmp_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_cmp_sd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcmpgesd %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_cmp_sd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcmpgesd %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 13)
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x float> @test_mm_cmp_ss(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_cmp_ss:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcmpgess %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_cmp_ss:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcmpgess %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 13)
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_cvtepi32_pd(<2 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvtepi32_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcvtdq2pd %xmm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvtepi32_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcvtdq2pd %xmm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
|
2016-05-24 06:17:36 +08:00
|
|
|
%res = sitofp <4 x i32> %arg0 to <4 x double>
|
2016-05-21 00:05:55 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_cvtepi32_ps(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvtepi32_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvtepi32_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %arg0)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x i64> @test_mm256_cvtpd_epi32(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvtpd_epi32:
|
|
|
|
; X32: # BB#0:
|
2016-11-14 09:53:29 +08:00
|
|
|
; X32-NEXT: vcvtpd2dq %ymm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvtpd_epi32:
|
|
|
|
; X64: # BB#0:
|
2016-11-14 09:53:29 +08:00
|
|
|
; X64-NEXT: vcvtpd2dq %ymm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%cvt = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
|
|
|
|
%res = bitcast <4 x i32> %cvt to <2 x i64>
|
|
|
|
ret <2 x i64> %res
|
|
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x float> @test_mm256_cvtpd_ps(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvtpd_ps:
|
|
|
|
; X32: # BB#0:
|
2016-11-14 09:53:29 +08:00
|
|
|
; X32-NEXT: vcvtpd2ps %ymm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvtpd_ps:
|
|
|
|
; X64: # BB#0:
|
2016-11-14 09:53:29 +08:00
|
|
|
; X64-NEXT: vcvtpd2ps %ymm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0)
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_cvtps_epi32(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvtps_epi32:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcvtps2dq %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvtps_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcvtps2dq %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%cvt = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
|
|
|
|
%res = bitcast <8 x i32> %cvt to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_cvtps_pd(<4 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvtps_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcvtps2pd %xmm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvtps_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcvtps2pd %xmm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
2016-05-24 06:17:36 +08:00
|
|
|
%res = fpext <4 x float> %a0 to <4 x double>
|
2016-05-21 00:05:55 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test_mm256_cvttpd_epi32(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvttpd_epi32:
|
|
|
|
; X32: # BB#0:
|
2016-11-14 09:53:29 +08:00
|
|
|
; X32-NEXT: vcvttpd2dq %ymm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvttpd_epi32:
|
|
|
|
; X64: # BB#0:
|
2016-11-14 09:53:29 +08:00
|
|
|
; X64-NEXT: vcvttpd2dq %ymm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
2016-07-19 23:07:43 +08:00
|
|
|
%cvt = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0)
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = bitcast <4 x i32> %cvt to <2 x i64>
|
|
|
|
ret <2 x i64> %res
|
|
|
|
}
|
2016-07-19 23:07:43 +08:00
|
|
|
declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
|
2016-05-21 00:05:55 +08:00
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_cvttps_epi32:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vcvttps2dq %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_cvttps_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vcvttps2dq %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
2016-07-19 23:07:43 +08:00
|
|
|
%cvt = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0)
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = bitcast <8 x i32> %cvt to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
2016-07-19 23:07:43 +08:00
|
|
|
declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
|
2016-05-21 00:05:55 +08:00
|
|
|
|
|
|
|
define <4 x double> @test_mm256_div_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_div_pd:
|
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vdivpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_div_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vdivpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fdiv <4 x double> %a0, %a1
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_div_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_div_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vdivps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_div_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vdivps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fdiv <8 x float> %a0, %a1
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_dp_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_dp_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_dp_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_extract_epi8(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extract_epi8:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vpextrb $15, %xmm0, %eax
|
|
|
|
; X32-NEXT: movzbl %al, %eax
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extract_epi8:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vpextrb $15, %xmm0, %eax
|
|
|
|
; X64-NEXT: movzbl %al, %eax
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
|
|
|
|
%ext = extractelement <32 x i8> %arg0, i32 31
|
|
|
|
%res = zext i8 %ext to i32
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_mm256_extract_epi16(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extract_epi16:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; X32-NEXT: movzwl %ax, %eax
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extract_epi16:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vpextrw $3, %xmm0, %eax
|
|
|
|
; X64-NEXT: movzwl %ax, %eax
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
|
|
|
|
%ext = extractelement <16 x i16> %arg0, i32 11
|
|
|
|
%res = zext i16 %ext to i32
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_mm256_extract_epi32(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extract_epi32:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
2017-10-22 04:19:48 +08:00
|
|
|
; X32-NEXT: vextractps $1, %xmm0, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extract_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
2017-10-22 04:19:48 +08:00
|
|
|
; X64-NEXT: vextractps $1, %xmm0, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
|
|
|
%res = extractelement <8 x i32> %arg0, i32 5
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @test_mm256_extract_epi64(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extract_epi64:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
2017-10-22 04:19:48 +08:00
|
|
|
; X32-NEXT: vextractps $2, %xmm0, %eax
|
|
|
|
; X32-NEXT: vextractps $3, %xmm0, %edx
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extract_epi64:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vpextrq $1, %xmm0, %rax
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = extractelement <4 x i64> %a0, i32 3
|
|
|
|
ret i64 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test_mm256_extractf128_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extractf128_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extractf128_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a0, <2 x i32> <i32 2, i32 3>
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @test_mm256_extractf128_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extractf128_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extractf128_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test_mm256_extractf128_si256(<4 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_extractf128_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_extractf128_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 2, i32 3>
|
|
|
|
ret <2 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_floor_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_floor_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vroundpd $1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_floor_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vroundpd $1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_floor_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_floor_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vroundps $1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_floor_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vroundps $1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_hadd_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_hadd_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_hadd_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_hadd_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_hadd_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vhaddps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_hadd_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vhaddps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_hsub_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_hsub_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_hsub_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_hsub_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_hsub_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vhsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_hsub_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vhsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_insert_epi8(<4 x i64> %a0, i8 %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insert_epi8:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1
|
|
|
|
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insert_epi8:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: movzbl %dil, %eax
|
|
|
|
; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1
|
|
|
|
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
|
|
|
|
%res = insertelement <32 x i8> %arg0, i8 %a1, i32 4
|
|
|
|
%bc = bitcast <32 x i8> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_insert_epi16(<4 x i64> %a0, i16 %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insert_epi16:
|
|
|
|
; X32: # BB#0:
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; X32-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insert_epi16:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; X64-NEXT: vpinsrw $6, %edi, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
|
|
|
|
%res = insertelement <16 x i16> %arg0, i16 %a1, i32 14
|
|
|
|
%bc = bitcast <16 x i16> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_insert_epi32(<4 x i64> %a0, i32 %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insert_epi32:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm1
|
|
|
|
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insert_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm1
|
|
|
|
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
|
|
|
|
%res = insertelement <8 x i32> %arg0, i32 %a1, i32 3
|
|
|
|
%bc = bitcast <8 x i32> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_insert_epi64(<4 x i64> %a0, i64 %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insert_epi64:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
2016-08-10 18:50:53 +08:00
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insert_epi64:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; X64-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = insertelement <4 x i64> %a0, i64 %a1, i32 3
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_insertf128_pd(<4 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insertf128_pd:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insertf128_pd:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ext = shufflevector <2 x double> %a1, <2 x double> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_insertf128_ps(<8 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insertf128_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insertf128_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ext = shufflevector <4 x float> %a1, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %ext, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_insertf128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_insertf128_si256:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_insertf128_si256:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_lddqu_si256(<4 x i64>* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_lddqu_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vlddqu (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_lddqu_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vlddqu (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <4 x i64>* %a0 to i8*
|
|
|
|
%res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %arg0)
|
|
|
|
%bc = bitcast <32 x i8> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_load_pd(double* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_load_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovaps (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_load_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovaps (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to <4 x double>*
|
|
|
|
%res = load <4 x double>, <4 x double>* %arg0, align 32
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_load_ps(float* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_load_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovaps (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_load_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovaps (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to <8 x float>*
|
|
|
|
%res = load <8 x float>, <8 x float>* %arg0, align 32
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_load_si256(<4 x i64>* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_load_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovaps (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_load_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovaps (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = load <4 x i64>, <4 x i64>* %a0, align 32
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_loadu_pd(double* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_loadu_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovups (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_loadu_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to <4 x double>*
|
|
|
|
%res = load <4 x double>, <4 x double>* %arg0, align 1
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_loadu_ps(float* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_loadu_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovups (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_loadu_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to <8 x float>*
|
|
|
|
%res = load <8 x float>, <8 x float>* %arg0, align 1
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_loadu_si256(<4 x i64>* %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_loadu_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovups (%eax), %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_loadu_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups (%rdi), %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = load <4 x i64>, <4 x i64>* %a0, align 1
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_loadu2_m128(float* %a0, float* %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_loadu2_m128:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovups (%eax), %xmm0
|
|
|
|
; X32-NEXT: vinsertf128 $1, (%ecx), %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_loadu2_m128:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups (%rsi), %xmm0
|
|
|
|
; X64-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to <4 x float>*
|
|
|
|
%hi4 = load <4 x float>, <4 x float>* %arg0, align 1
|
|
|
|
%hi8 = shufflevector <4 x float> %hi4, <4 x float> %hi4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
|
|
%arg1 = bitcast float* %a1 to <4 x float>*
|
|
|
|
%lo4 = load <4 x float>, <4 x float>* %arg1, align 1
|
|
|
|
%lo8 = shufflevector <4 x float> %lo4, <4 x float> %lo4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
|
|
%res = shufflevector <8 x float> %lo8, <8 x float> %hi8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_loadu2_m128d(double* %a0, double* %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_loadu2_m128d:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovups (%eax), %xmm0
|
|
|
|
; X32-NEXT: vinsertf128 $1, (%ecx), %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_loadu2_m128d:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups (%rsi), %xmm0
|
|
|
|
; X64-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to <2 x double>*
|
|
|
|
%hi2 = load <2 x double>, <2 x double>* %arg0, align 1
|
|
|
|
%hi4 = shufflevector <2 x double> %hi2, <2 x double> %hi2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%arg1 = bitcast double* %a1 to <2 x double>*
|
|
|
|
%lo2 = load <2 x double>, <2 x double>* %arg1, align 1
|
|
|
|
%lo4 = shufflevector <2 x double> %lo2, <2 x double> %lo2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%res = shufflevector <4 x double> %lo4, <4 x double> %hi4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_loadu2_m128i(i64* %a0, i64* %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_loadu2_m128i:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovups (%eax), %xmm0
|
|
|
|
; X32-NEXT: vinsertf128 $1, (%ecx), %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_loadu2_m128i:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups (%rsi), %xmm0
|
|
|
|
; X64-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast i64* %a0 to <2 x i64>*
|
|
|
|
%hi2 = load <2 x i64>, <2 x i64>* %arg0, align 1
|
|
|
|
%hi4 = shufflevector <2 x i64> %hi2, <2 x i64> %hi2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%arg1 = bitcast i64* %a1 to <2 x i64>*
|
|
|
|
%lo2 = load <2 x i64>, <2 x i64>* %arg1, align 1
|
|
|
|
%lo4 = shufflevector <2 x i64> %lo2, <2 x i64> %lo2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%res = shufflevector <4 x i64> %lo4, <4 x i64> %hi4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test_mm_maskload_pd(double* %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_maskload_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_maskload_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to i8*
|
|
|
|
%res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %arg0, <2 x i64> %a1)
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_maskload_pd(double* %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_maskload_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_maskload_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to i8*
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %arg0, <4 x i64> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x float> @test_mm_maskload_ps(float* %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_maskload_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovps (%eax), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_maskload_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to i8*
|
|
|
|
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
|
|
|
%res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %arg0, <4 x i32> %arg1)
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_maskload_ps(float* %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_maskload_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovps (%eax), %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_maskload_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to i8*
|
|
|
|
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %arg0, <8 x i32> %arg1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define void @test_mm_maskstore_pd(double* %a0, <2 x i64> %a1, <2 x double> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm_maskstore_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_maskstore_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to i8*
|
|
|
|
call void @llvm.x86.avx.maskstore.pd(i8* %arg0, <2 x i64> %a1, <2 x double> %a2)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define void @test_mm256_maskstore_pd(double* %a0, <4 x i64> %a1, <4 x double> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_maskstore_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_maskstore_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to i8*
|
|
|
|
call void @llvm.x86.avx.maskstore.pd.256(i8* %arg0, <4 x i64> %a1, <4 x double> %a2)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define void @test_mm_maskstore_ps(float* %a0, <2 x i64> %a1, <4 x float> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm_maskstore_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovps %xmm1, %xmm0, (%eax)
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_maskstore_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to i8*
|
|
|
|
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
|
|
|
call void @llvm.x86.avx.maskstore.ps(i8* %arg0, <4 x i32> %arg1, <4 x float> %a2)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define void @test_mm256_maskstore_ps(float* %a0, <4 x i64> %a1, <8 x float> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_maskstore_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmaskmovps %ymm1, %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_maskstore_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to i8*
|
|
|
|
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
|
|
|
call void @llvm.x86.avx.maskstore.ps.256(i8* %arg0, <8 x i32> %arg1, <8 x float> %a2)
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_max_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_max_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_max_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_max_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_max_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmaxps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_max_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_min_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_min_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vminpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_min_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vminpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_min_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_min_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vminps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_min_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vminps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_movedup_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_movedup_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_movedup_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_movehdup_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_movehdup_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_movehdup_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_moveldup_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_moveldup_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_moveldup_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_mm256_movemask_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_movemask_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_movemask_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovmskpd %ymm0, %eax
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_movemask_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_movemask_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_movemask_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovmskps %ymm0, %eax
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_mul_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_mul_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_mul_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fmul <4 x double> %a0, %a1
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_mul_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_mul_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_mul_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fmul <8 x float> %a0, %a1
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_or_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_or_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_or_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <4 x double> %a0 to <4 x i64>
|
|
|
|
%2 = bitcast <4 x double> %a1 to <4 x i64>
|
|
|
|
%res = or <4 x i64> %1, %2
|
|
|
|
%bc = bitcast <4 x i64> %res to <4 x double>
|
|
|
|
ret <4 x double> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_or_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_or_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_or_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <8 x float> %a0 to <8 x i32>
|
|
|
|
%2 = bitcast <8 x float> %a1 to <8 x i32>
|
|
|
|
%res = or <8 x i32> %1, %2
|
|
|
|
%bc = bitcast <8 x i32> %res to <8 x float>
|
|
|
|
ret <8 x float> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test_mm_permute_pd(<2 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm_permute_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_permute_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> <i32 1, i32 0>
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_permute_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permute_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permute_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @test_mm_permute_ps(<4 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm_permute_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_permute_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @test2_mm_permute_ps(<4 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test2_mm_permute_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,3]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test2_mm_permute_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,3]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_permute_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permute_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permute_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_permute2f128_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permute2f128_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm1[0,1]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permute2f128_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm1[0,1]
|
|
|
|
; X64-NEXT: retq
|
2017-09-16 07:53:43 +08:00
|
|
|
%res = shufflevector <4 x double> zeroinitializer, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
2016-05-21 00:05:55 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
|
|
|
|
|
|
|
|
; PR26667
|
|
|
|
define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permute2f128_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovaps %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permute2f128_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovaps %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
2017-09-16 07:53:43 +08:00
|
|
|
%res = shufflevector <8 x float> %a1, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
2016-05-21 00:05:55 +08:00
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_permute2f128_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permute2f128_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permute2f128_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <4 x i64> %a0 to <8 x i32>
|
|
|
|
%2 = bitcast <4 x i64> %a1 to <8 x i32>
|
2017-09-16 07:53:43 +08:00
|
|
|
%res = shufflevector <8 x i32> %2, <8 x i32> %2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
2016-05-21 00:05:55 +08:00
|
|
|
%bc = bitcast <8 x i32> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x double> @test_mm_permutevar_pd(<2 x double> %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_permutevar_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_permutevar_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
|
|
|
|
ret <2 x double> %res
|
|
|
|
}
|
|
|
|
declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_permutevar_pd(<4 x double> %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permutevar_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permutevar_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x float> @test_mm_permutevar_ps(<4 x float> %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_permutevar_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilps %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_permutevar_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
|
|
|
|
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %arg1)
|
|
|
|
ret <4 x float> %res
|
|
|
|
}
|
|
|
|
declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_permutevar_ps(<8 x float> %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_permutevar_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vpermilps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_permutevar_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %arg1)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_rcp_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_rcp_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vrcpps %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_rcp_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vrcpps %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_round_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_round_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vroundpd $4, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_round_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vroundpd $4, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 4)
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_round_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_round_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vroundps $4, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_round_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vroundps $4, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 4)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_rsqrt_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_rsqrt_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vrsqrtps %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_rsqrt_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vrsqrtps %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_epi8:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; X32-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; X32-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_epi8:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vmovd %eax, %xmm0
|
|
|
|
; X64-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %r9b, %eax
|
|
|
|
; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %r8b, %eax
|
|
|
|
; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %cl, %eax
|
|
|
|
; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %dl, %eax
|
|
|
|
; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %sil, %eax
|
|
|
|
; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %dil, %eax
|
|
|
|
; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
|
|
|
; X64-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <32 x i8> undef, i8 %a31, i32 0
|
|
|
|
%res1 = insertelement <32 x i8> %res0, i8 %a30, i32 1
|
|
|
|
%res2 = insertelement <32 x i8> %res1, i8 %a29, i32 2
|
|
|
|
%res3 = insertelement <32 x i8> %res2, i8 %a28, i32 3
|
|
|
|
%res4 = insertelement <32 x i8> %res3, i8 %a27, i32 4
|
|
|
|
%res5 = insertelement <32 x i8> %res4, i8 %a26, i32 5
|
|
|
|
%res6 = insertelement <32 x i8> %res5, i8 %a25, i32 6
|
|
|
|
%res7 = insertelement <32 x i8> %res6, i8 %a24, i32 7
|
|
|
|
%res8 = insertelement <32 x i8> %res7, i8 %a23, i32 8
|
|
|
|
%res9 = insertelement <32 x i8> %res8, i8 %a22, i32 9
|
|
|
|
%res10 = insertelement <32 x i8> %res9, i8 %a21, i32 10
|
|
|
|
%res11 = insertelement <32 x i8> %res10, i8 %a20, i32 11
|
|
|
|
%res12 = insertelement <32 x i8> %res11, i8 %a19, i32 12
|
|
|
|
%res13 = insertelement <32 x i8> %res12, i8 %a18, i32 13
|
|
|
|
%res14 = insertelement <32 x i8> %res13, i8 %a17, i32 14
|
|
|
|
%res15 = insertelement <32 x i8> %res14, i8 %a16, i32 15
|
|
|
|
%res16 = insertelement <32 x i8> %res15, i8 %a15, i32 16
|
|
|
|
%res17 = insertelement <32 x i8> %res16, i8 %a14, i32 17
|
|
|
|
%res18 = insertelement <32 x i8> %res17, i8 %a13, i32 18
|
|
|
|
%res19 = insertelement <32 x i8> %res18, i8 %a12, i32 19
|
|
|
|
%res20 = insertelement <32 x i8> %res19, i8 %a11, i32 20
|
|
|
|
%res21 = insertelement <32 x i8> %res20, i8 %a10, i32 21
|
|
|
|
%res22 = insertelement <32 x i8> %res21, i8 %a9 , i32 22
|
|
|
|
%res23 = insertelement <32 x i8> %res22, i8 %a8 , i32 23
|
|
|
|
%res24 = insertelement <32 x i8> %res23, i8 %a7 , i32 24
|
|
|
|
%res25 = insertelement <32 x i8> %res24, i8 %a6 , i32 25
|
|
|
|
%res26 = insertelement <32 x i8> %res25, i8 %a5 , i32 26
|
|
|
|
%res27 = insertelement <32 x i8> %res26, i8 %a4 , i32 27
|
|
|
|
%res28 = insertelement <32 x i8> %res27, i8 %a3 , i32 28
|
|
|
|
%res29 = insertelement <32 x i8> %res28, i8 %a2 , i32 29
|
|
|
|
%res30 = insertelement <32 x i8> %res29, i8 %a1 , i32 30
|
|
|
|
%res31 = insertelement <32 x i8> %res30, i8 %a0 , i32 31
|
|
|
|
%res = bitcast <32 x i8> %res31 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_epi16:
|
|
|
|
; X32: # BB#0:
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vmovd %eax, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vmovd %eax, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_epi16:
|
|
|
|
; X64: # BB#0:
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vmovd %eax, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vmovd %eax, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <16 x i16> undef, i16 %a15, i32 0
|
|
|
|
%res1 = insertelement <16 x i16> %res0, i16 %a14, i32 1
|
|
|
|
%res2 = insertelement <16 x i16> %res1, i16 %a13, i32 2
|
|
|
|
%res3 = insertelement <16 x i16> %res2, i16 %a12, i32 3
|
|
|
|
%res4 = insertelement <16 x i16> %res3, i16 %a11, i32 4
|
|
|
|
%res5 = insertelement <16 x i16> %res4, i16 %a10, i32 5
|
|
|
|
%res6 = insertelement <16 x i16> %res5, i16 %a9 , i32 6
|
|
|
|
%res7 = insertelement <16 x i16> %res6, i16 %a8 , i32 7
|
|
|
|
%res8 = insertelement <16 x i16> %res7, i16 %a7 , i32 8
|
|
|
|
%res9 = insertelement <16 x i16> %res8, i16 %a6 , i32 9
|
|
|
|
%res10 = insertelement <16 x i16> %res9, i16 %a5 , i32 10
|
|
|
|
%res11 = insertelement <16 x i16> %res10, i16 %a4 , i32 11
|
|
|
|
%res12 = insertelement <16 x i16> %res11, i16 %a3 , i32 12
|
|
|
|
%res13 = insertelement <16 x i16> %res12, i16 %a2 , i32 13
|
|
|
|
%res14 = insertelement <16 x i16> %res13, i16 %a1 , i32 14
|
|
|
|
%res15 = insertelement <16 x i16> %res14, i16 %a0 , i32 15
|
|
|
|
%res = bitcast <16 x i16> %res15 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_epi32:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; X64-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X64-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrd $2, %r9d, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrd $3, %r8d, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <8 x i32> undef, i32 %a7, i32 0
|
|
|
|
%res1 = insertelement <8 x i32> %res0, i32 %a6, i32 1
|
|
|
|
%res2 = insertelement <8 x i32> %res1, i32 %a5, i32 2
|
|
|
|
%res3 = insertelement <8 x i32> %res2, i32 %a4, i32 3
|
|
|
|
%res4 = insertelement <8 x i32> %res3, i32 %a3, i32 4
|
|
|
|
%res5 = insertelement <8 x i32> %res4, i32 %a2, i32 5
|
|
|
|
%res6 = insertelement <8 x i32> %res5, i32 %a1, i32 6
|
|
|
|
%res7 = insertelement <8 x i32> %res6, i32 %a0, i32 7
|
|
|
|
%res = bitcast <8 x i32> %res7 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set_epi64x(i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_epi64x:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_epi64x:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovq %rdi, %xmm0
|
|
|
|
; X64-NEXT: vmovq %rsi, %xmm1
|
|
|
|
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; X64-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; X64-NEXT: vmovq %rcx, %xmm2
|
|
|
|
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <4 x i64> undef, i64 %a3, i32 0
|
|
|
|
%res1 = insertelement <4 x i64> %res0, i64 %a2, i32 1
|
|
|
|
%res2 = insertelement <4 x i64> %res1, i64 %a1, i32 2
|
|
|
|
%res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3
|
|
|
|
ret <4 x i64> %res3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_set_m128(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_m128:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_m128:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x float> %a1, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_set_m128d(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_m128d:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_m128d:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <2 x double> %a0 to <4 x float>
|
|
|
|
%arg1 = bitcast <2 x double> %a1 to <4 x float>
|
|
|
|
%res = shufflevector <4 x float> %arg1, <4 x float> %arg0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
%bc = bitcast <8 x float> %res to <4 x double>
|
|
|
|
ret <4 x double> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_m128i:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_m128i:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <2 x i64> %a0 to <4 x float>
|
|
|
|
%arg1 = bitcast <2 x i64> %a1 to <4 x float>
|
|
|
|
%res = shufflevector <4 x float> %arg1, <4 x float> %arg0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
%bc = bitcast <8 x float> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_set_pd(double %a0, double %a1, double %a2, double %a3) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 302938
2017-05-13 03:29:27 +08:00
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
2017-05-19 02:50:05 +08:00
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
2017-09-18 12:40:58 +08:00
|
|
|
; X32-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2017-05-19 02:50:05 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_pd:
|
|
|
|
; X64: # BB#0:
|
2017-09-18 12:40:58 +08:00
|
|
|
; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; X64-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <4 x double> undef, double %a3, i32 0
|
|
|
|
%res1 = insertelement <4 x double> %res0, double %a2, i32 1
|
|
|
|
%res2 = insertelement <4 x double> %res1, double %a1, i32 2
|
|
|
|
%res3 = insertelement <4 x double> %res2, double %a0, i32 3
|
|
|
|
ret <4 x double> %res3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_set_ps(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 302938
2017-05-13 03:29:27 +08:00
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
2017-05-19 02:50:05 +08:00
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm7[0],xmm6[0],xmm7[2,3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm5[0],xmm1[3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <8 x float> undef, float %a7, i32 0
|
|
|
|
%res1 = insertelement <8 x float> %res0, float %a6, i32 1
|
|
|
|
%res2 = insertelement <8 x float> %res1, float %a5, i32 2
|
|
|
|
%res3 = insertelement <8 x float> %res2, float %a4, i32 3
|
|
|
|
%res4 = insertelement <8 x float> %res3, float %a3, i32 4
|
|
|
|
%res5 = insertelement <8 x float> %res4, float %a2, i32 5
|
|
|
|
%res6 = insertelement <8 x float> %res5, float %a1, i32 6
|
|
|
|
%res7 = insertelement <8 x float> %res6, float %a0, i32 7
|
|
|
|
ret <8 x float> %res7
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set1_epi8(i8 %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set1_epi8:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovd %eax, %xmm0
|
|
|
|
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set1_epi8:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: movzbl %dil, %eax
|
|
|
|
; X64-NEXT: vmovd %eax, %xmm0
|
|
|
|
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <32 x i8> undef, i8 %a0, i32 0
|
|
|
|
%res1 = insertelement <32 x i8> %res0, i8 %a0, i32 1
|
|
|
|
%res2 = insertelement <32 x i8> %res1, i8 %a0, i32 2
|
|
|
|
%res3 = insertelement <32 x i8> %res2, i8 %a0, i32 3
|
|
|
|
%res4 = insertelement <32 x i8> %res3, i8 %a0, i32 4
|
|
|
|
%res5 = insertelement <32 x i8> %res4, i8 %a0, i32 5
|
|
|
|
%res6 = insertelement <32 x i8> %res5, i8 %a0, i32 6
|
|
|
|
%res7 = insertelement <32 x i8> %res6, i8 %a0, i32 7
|
|
|
|
%res8 = insertelement <32 x i8> %res7, i8 %a0, i32 8
|
|
|
|
%res9 = insertelement <32 x i8> %res8, i8 %a0, i32 9
|
|
|
|
%res10 = insertelement <32 x i8> %res9, i8 %a0, i32 10
|
|
|
|
%res11 = insertelement <32 x i8> %res10, i8 %a0, i32 11
|
|
|
|
%res12 = insertelement <32 x i8> %res11, i8 %a0, i32 12
|
|
|
|
%res13 = insertelement <32 x i8> %res12, i8 %a0, i32 13
|
|
|
|
%res14 = insertelement <32 x i8> %res13, i8 %a0, i32 14
|
|
|
|
%res15 = insertelement <32 x i8> %res14, i8 %a0, i32 15
|
|
|
|
%res16 = insertelement <32 x i8> %res15, i8 %a0, i32 16
|
|
|
|
%res17 = insertelement <32 x i8> %res16, i8 %a0, i32 17
|
|
|
|
%res18 = insertelement <32 x i8> %res17, i8 %a0, i32 18
|
|
|
|
%res19 = insertelement <32 x i8> %res18, i8 %a0, i32 19
|
|
|
|
%res20 = insertelement <32 x i8> %res19, i8 %a0, i32 20
|
|
|
|
%res21 = insertelement <32 x i8> %res20, i8 %a0, i32 21
|
|
|
|
%res22 = insertelement <32 x i8> %res21, i8 %a0, i32 22
|
|
|
|
%res23 = insertelement <32 x i8> %res22, i8 %a0, i32 23
|
|
|
|
%res24 = insertelement <32 x i8> %res23, i8 %a0, i32 24
|
|
|
|
%res25 = insertelement <32 x i8> %res24, i8 %a0, i32 25
|
|
|
|
%res26 = insertelement <32 x i8> %res25, i8 %a0, i32 26
|
|
|
|
%res27 = insertelement <32 x i8> %res26, i8 %a0, i32 27
|
|
|
|
%res28 = insertelement <32 x i8> %res27, i8 %a0, i32 28
|
|
|
|
%res29 = insertelement <32 x i8> %res28, i8 %a0, i32 29
|
|
|
|
%res30 = insertelement <32 x i8> %res29, i8 %a0, i32 30
|
|
|
|
%res31 = insertelement <32 x i8> %res30, i8 %a0, i32 31
|
|
|
|
%res = bitcast <32 x i8> %res31 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set1_epi16(i16 %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set1_epi16:
|
|
|
|
; X32: # BB#0:
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vmovd %eax, %xmm0
|
|
|
|
; X32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
|
|
|
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set1_epi16:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovd %edi, %xmm0
|
|
|
|
; X64-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
|
|
|
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <16 x i16> undef, i16 %a0, i32 0
|
|
|
|
%res1 = insertelement <16 x i16> %res0, i16 %a0, i32 1
|
|
|
|
%res2 = insertelement <16 x i16> %res1, i16 %a0, i32 2
|
|
|
|
%res3 = insertelement <16 x i16> %res2, i16 %a0, i32 3
|
|
|
|
%res4 = insertelement <16 x i16> %res3, i16 %a0, i32 4
|
|
|
|
%res5 = insertelement <16 x i16> %res4, i16 %a0, i32 5
|
|
|
|
%res6 = insertelement <16 x i16> %res5, i16 %a0, i32 6
|
|
|
|
%res7 = insertelement <16 x i16> %res6, i16 %a0, i32 7
|
|
|
|
%res8 = insertelement <16 x i16> %res7, i16 %a0, i32 8
|
|
|
|
%res9 = insertelement <16 x i16> %res8, i16 %a0, i32 9
|
|
|
|
%res10 = insertelement <16 x i16> %res9, i16 %a0, i32 10
|
|
|
|
%res11 = insertelement <16 x i16> %res10, i16 %a0, i32 11
|
|
|
|
%res12 = insertelement <16 x i16> %res11, i16 %a0, i32 12
|
|
|
|
%res13 = insertelement <16 x i16> %res12, i16 %a0, i32 13
|
|
|
|
%res14 = insertelement <16 x i16> %res13, i16 %a0, i32 14
|
|
|
|
%res15 = insertelement <16 x i16> %res14, i16 %a0, i32 15
|
|
|
|
%res = bitcast <16 x i16> %res15 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set1_epi32:
|
|
|
|
; X32: # BB#0:
|
2017-09-18 11:29:47 +08:00
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set1_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovd %edi, %xmm0
|
2016-06-28 16:08:15 +08:00
|
|
|
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <8 x i32> undef, i32 %a0, i32 0
|
|
|
|
%res1 = insertelement <8 x i32> %res0, i32 %a0, i32 1
|
|
|
|
%res2 = insertelement <8 x i32> %res1, i32 %a0, i32 2
|
|
|
|
%res3 = insertelement <8 x i32> %res2, i32 %a0, i32 3
|
|
|
|
%res4 = insertelement <8 x i32> %res3, i32 %a0, i32 4
|
|
|
|
%res5 = insertelement <8 x i32> %res4, i32 %a0, i32 5
|
|
|
|
%res6 = insertelement <8 x i32> %res5, i32 %a0, i32 6
|
|
|
|
%res7 = insertelement <8 x i32> %res6, i32 %a0, i32 7
|
|
|
|
%res = bitcast <8 x i32> %res7 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set1_epi64x:
|
|
|
|
; X32: # BB#0:
|
2017-04-07 06:33:25 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set1_epi64x:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovq %rdi, %xmm0
|
2016-06-28 16:08:15 +08:00
|
|
|
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <4 x i64> undef, i64 %a0, i32 0
|
|
|
|
%res1 = insertelement <4 x i64> %res0, i64 %a0, i32 1
|
|
|
|
%res2 = insertelement <4 x i64> %res1, i64 %a0, i32 2
|
|
|
|
%res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3
|
|
|
|
ret <4 x i64> %res3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_set1_pd(double %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set1_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set1_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <4 x double> undef, double %a0, i32 0
|
|
|
|
%res1 = insertelement <4 x double> %res0, double %a0, i32 1
|
|
|
|
%res2 = insertelement <4 x double> %res1, double %a0, i32 2
|
|
|
|
%res3 = insertelement <4 x double> %res2, double %a0, i32 3
|
|
|
|
ret <4 x double> %res3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_set1_ps(float %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_set1_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_set1_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <8 x float> undef, float %a0, i32 0
|
|
|
|
%res1 = insertelement <8 x float> %res0, float %a0, i32 1
|
|
|
|
%res2 = insertelement <8 x float> %res1, float %a0, i32 2
|
|
|
|
%res3 = insertelement <8 x float> %res2, float %a0, i32 3
|
|
|
|
%res4 = insertelement <8 x float> %res3, float %a0, i32 4
|
|
|
|
%res5 = insertelement <8 x float> %res4, float %a0, i32 5
|
|
|
|
%res6 = insertelement <8 x float> %res5, float %a0, i32 6
|
|
|
|
%res7 = insertelement <8 x float> %res6, float %a0, i32 7
|
|
|
|
ret <8 x float> %res7
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_epi8:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; X32-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; X32-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_epi8:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vmovd %eax, %xmm0
|
|
|
|
; X64-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: movzbl %sil, %eax
|
|
|
|
; X64-NEXT: movzbl %dil, %esi
|
|
|
|
; X64-NEXT: vmovd %esi, %xmm1
|
|
|
|
; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl %dl, %eax
|
|
|
|
; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl %cl, %eax
|
|
|
|
; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl %r8b, %eax
|
|
|
|
; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl %r9b, %eax
|
|
|
|
; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
|
|
|
; X64-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <32 x i8> undef, i8 %a0 , i32 0
|
|
|
|
%res1 = insertelement <32 x i8> %res0, i8 %a1 , i32 1
|
|
|
|
%res2 = insertelement <32 x i8> %res1, i8 %a2 , i32 2
|
|
|
|
%res3 = insertelement <32 x i8> %res2, i8 %a3 , i32 3
|
|
|
|
%res4 = insertelement <32 x i8> %res3, i8 %a4 , i32 4
|
|
|
|
%res5 = insertelement <32 x i8> %res4, i8 %a5 , i32 5
|
|
|
|
%res6 = insertelement <32 x i8> %res5, i8 %a6 , i32 6
|
|
|
|
%res7 = insertelement <32 x i8> %res6, i8 %a7 , i32 7
|
|
|
|
%res8 = insertelement <32 x i8> %res7, i8 %a8 , i32 8
|
|
|
|
%res9 = insertelement <32 x i8> %res8, i8 %a9 , i32 9
|
|
|
|
%res10 = insertelement <32 x i8> %res9, i8 %a10, i32 10
|
|
|
|
%res11 = insertelement <32 x i8> %res10, i8 %a11, i32 11
|
|
|
|
%res12 = insertelement <32 x i8> %res11, i8 %a12, i32 12
|
|
|
|
%res13 = insertelement <32 x i8> %res12, i8 %a13, i32 13
|
|
|
|
%res14 = insertelement <32 x i8> %res13, i8 %a14, i32 14
|
|
|
|
%res15 = insertelement <32 x i8> %res14, i8 %a15, i32 15
|
|
|
|
%res16 = insertelement <32 x i8> %res15, i8 %a16, i32 16
|
|
|
|
%res17 = insertelement <32 x i8> %res16, i8 %a17, i32 17
|
|
|
|
%res18 = insertelement <32 x i8> %res17, i8 %a18, i32 18
|
|
|
|
%res19 = insertelement <32 x i8> %res18, i8 %a19, i32 19
|
|
|
|
%res20 = insertelement <32 x i8> %res19, i8 %a20, i32 20
|
|
|
|
%res21 = insertelement <32 x i8> %res20, i8 %a21, i32 21
|
|
|
|
%res22 = insertelement <32 x i8> %res21, i8 %a22, i32 22
|
|
|
|
%res23 = insertelement <32 x i8> %res22, i8 %a23, i32 23
|
|
|
|
%res24 = insertelement <32 x i8> %res23, i8 %a24, i32 24
|
|
|
|
%res25 = insertelement <32 x i8> %res24, i8 %a25, i32 25
|
|
|
|
%res26 = insertelement <32 x i8> %res25, i8 %a26, i32 26
|
|
|
|
%res27 = insertelement <32 x i8> %res26, i8 %a27, i32 27
|
|
|
|
%res28 = insertelement <32 x i8> %res27, i8 %a28, i32 28
|
|
|
|
%res29 = insertelement <32 x i8> %res28, i8 %a29, i32 29
|
|
|
|
%res30 = insertelement <32 x i8> %res29, i8 %a30, i32 30
|
|
|
|
%res31 = insertelement <32 x i8> %res30, i8 %a31, i32 31
|
|
|
|
%res = bitcast <32 x i8> %res31 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_epi16:
|
|
|
|
; X32: # BB#0:
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vmovd %eax, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vmovd %eax, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_epi16:
|
|
|
|
; X64: # BB#0:
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vmovd %eax, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vmovd %edi, %xmm1
|
|
|
|
; X64-NEXT: vpinsrw $1, %esi, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrw $2, %edx, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrw $4, %r8d, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrw $5, %r9d, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
|
[X86FixupBWInsts] More precise register liveness if no <imp-use> on MOVs.
Summary:
Subregister liveness tracking is not implemented for X86 backend, so
sometimes the whole super register is said to be live, when only a
subregister is really live. That might happen if the def and the use
are located in different MBBs, see added fixup-bw-isnt.mir test.
However, using knowledge of the specific instructions handled by the
bw-fixup-pass we can get more precise liveness information which this
change does.
Reviewers: MatzeB, DavidKreitzer, ab, andrew.w.kaylor, craig.topper
Reviewed By: craig.topper
Subscribers: n.bozhenov, myatsina, llvm-commits, hiraditya
Patch by Andrei Elovikov <andrei.elovikov@intel.com>
Differential Revision: https://reviews.llvm.org/D37559
llvm-svn: 313524
2017-09-18 18:17:59 +08:00
|
|
|
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <16 x i16> undef, i16 %a0 , i32 0
|
|
|
|
%res1 = insertelement <16 x i16> %res0, i16 %a1 , i32 1
|
|
|
|
%res2 = insertelement <16 x i16> %res1, i16 %a2 , i32 2
|
|
|
|
%res3 = insertelement <16 x i16> %res2, i16 %a3 , i32 3
|
|
|
|
%res4 = insertelement <16 x i16> %res3, i16 %a4 , i32 4
|
|
|
|
%res5 = insertelement <16 x i16> %res4, i16 %a5 , i32 5
|
|
|
|
%res6 = insertelement <16 x i16> %res5, i16 %a6 , i32 6
|
|
|
|
%res7 = insertelement <16 x i16> %res6, i16 %a7 , i32 7
|
|
|
|
%res8 = insertelement <16 x i16> %res7, i16 %a8 , i32 8
|
|
|
|
%res9 = insertelement <16 x i16> %res8, i16 %a9 , i32 9
|
|
|
|
%res10 = insertelement <16 x i16> %res9, i16 %a10, i32 10
|
|
|
|
%res11 = insertelement <16 x i16> %res10, i16 %a11, i32 11
|
|
|
|
%res12 = insertelement <16 x i16> %res11, i16 %a12, i32 12
|
|
|
|
%res13 = insertelement <16 x i16> %res12, i16 %a13, i32 13
|
|
|
|
%res14 = insertelement <16 x i16> %res13, i16 %a14, i32 14
|
|
|
|
%res15 = insertelement <16 x i16> %res14, i16 %a15, i32 15
|
|
|
|
%res = bitcast <16 x i16> %res15 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_epi32:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_epi32:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovd %r8d, %xmm0
|
|
|
|
; X64-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0
|
|
|
|
; X64-NEXT: vmovd %edi, %xmm1
|
|
|
|
; X64-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <8 x i32> undef, i32 %a0, i32 0
|
|
|
|
%res1 = insertelement <8 x i32> %res0, i32 %a1, i32 1
|
|
|
|
%res2 = insertelement <8 x i32> %res1, i32 %a2, i32 2
|
|
|
|
%res3 = insertelement <8 x i32> %res2, i32 %a3, i32 3
|
|
|
|
%res4 = insertelement <8 x i32> %res3, i32 %a4, i32 4
|
|
|
|
%res5 = insertelement <8 x i32> %res4, i32 %a5, i32 5
|
|
|
|
%res6 = insertelement <8 x i32> %res5, i32 %a6, i32 6
|
|
|
|
%res7 = insertelement <8 x i32> %res6, i32 %a7, i32 7
|
|
|
|
%res = bitcast <8 x i32> %res7 to <4 x i64>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_setr_epi64x(i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_epi64x:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
|
|
|
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_epi64x:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; X64-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; X64-NEXT: vmovq %rsi, %xmm1
|
|
|
|
; X64-NEXT: vmovq %rdi, %xmm2
|
|
|
|
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <4 x i64> undef, i64 %a0, i32 0
|
|
|
|
%res1 = insertelement <4 x i64> %res0, i64 %a1, i32 1
|
|
|
|
%res2 = insertelement <4 x i64> %res1, i64 %a2, i32 2
|
|
|
|
%res3 = insertelement <4 x i64> %res2, i64 %a3, i32 3
|
|
|
|
ret <4 x i64> %res3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_setr_m128(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_m128:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_m128:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_setr_m128d(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_m128d:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_m128d:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <2 x double> %a0 to <4 x float>
|
|
|
|
%arg1 = bitcast <2 x double> %a1 to <4 x float>
|
|
|
|
%res = shufflevector <4 x float> %arg0, <4 x float> %arg1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
%bc = bitcast <8 x float> %res to <4 x double>
|
|
|
|
ret <4 x double> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_setr_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_m128i:
|
|
|
|
; X32: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_m128i:
|
|
|
|
; X64: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast <2 x i64> %a0 to <4 x float>
|
|
|
|
%arg1 = bitcast <2 x i64> %a1 to <4 x float>
|
|
|
|
%res = shufflevector <4 x float> %arg0, <4 x float> %arg1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
%bc = bitcast <8 x float> %res to <4 x i64>
|
|
|
|
ret <4 x i64> %bc
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_setr_pd(double %a0, double %a1, double %a2, double %a3) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
Add LiveRangeShrink pass to shrink live range within BB.
Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB.
Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb
Reviewed By: MatzeB, andreadb
Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32563
llvm-svn: 302938
2017-05-13 03:29:27 +08:00
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
2017-05-19 02:50:05 +08:00
|
|
|
; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
2017-09-18 12:40:58 +08:00
|
|
|
; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; X32-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_pd:
|
|
|
|
; X64: # BB#0:
|
2017-09-18 12:40:58 +08:00
|
|
|
; X64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <4 x double> undef, double %a0, i32 0
|
|
|
|
%res1 = insertelement <4 x double> %res0, double %a1, i32 1
|
|
|
|
%res2 = insertelement <4 x double> %res1, double %a2, i32 2
|
|
|
|
%res3 = insertelement <4 x double> %res2, double %a3, i32 3
|
|
|
|
ret <4 x double> %res3
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_setr_ps(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setr_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
2017-05-19 02:50:05 +08:00
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
|
|
|
|
; X32-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
2017-05-19 02:50:05 +08:00
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm7[0],xmm6[0],xmm7[2,3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm5[0],xmm1[3]
|
|
|
|
; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setr_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
|
|
|
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
|
|
|
|
; X64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res0 = insertelement <8 x float> undef, float %a0, i32 0
|
|
|
|
%res1 = insertelement <8 x float> %res0, float %a1, i32 1
|
|
|
|
%res2 = insertelement <8 x float> %res1, float %a2, i32 2
|
|
|
|
%res3 = insertelement <8 x float> %res2, float %a3, i32 3
|
|
|
|
%res4 = insertelement <8 x float> %res3, float %a4, i32 4
|
|
|
|
%res5 = insertelement <8 x float> %res4, float %a5, i32 5
|
|
|
|
%res6 = insertelement <8 x float> %res5, float %a6, i32 6
|
|
|
|
%res7 = insertelement <8 x float> %res6, float %a7, i32 7
|
|
|
|
ret <8 x float> %res7
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_setzero_pd() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setzero_pd:
|
|
|
|
; X32: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setzero_pd:
|
|
|
|
; X64: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
ret <4 x double> zeroinitializer
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_setzero_ps() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setzero_ps:
|
|
|
|
; X32: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_setzero_ps:
|
|
|
|
; X64: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
ret <8 x float> zeroinitializer
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_setzero_si256() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_setzero_si256:
|
|
|
|
; X32: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_setzero_si256:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
ret <4 x i64> zeroinitializer
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_shuffle_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_shuffle_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_shuffle_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_shuffle_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_shuffle_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_shuffle_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_sqrt_pd(<4 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_sqrt_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vsqrtpd %ymm0, %ymm0
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_sqrt_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vsqrtpd %ymm0, %ymm0
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
2016-05-21 00:05:55 +08:00
|
|
|
declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
|
2016-02-19 22:38:09 +08:00
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_sqrt_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vsqrtps %ymm0, %ymm0
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_sqrt_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vsqrtps %ymm0, %ymm0
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
|
2016-02-19 22:38:09 +08:00
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
2016-05-21 00:05:55 +08:00
|
|
|
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
|
2016-02-19 22:38:09 +08:00
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define void @test_mm256_store_pd(double* %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_store_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovaps %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_store_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovaps %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to <4 x double>*
|
|
|
|
store <4 x double> %a1, <4 x double>* %arg0, align 32
|
|
|
|
ret void
|
|
|
|
}
|
2016-02-19 22:38:09 +08:00
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define void @test_mm256_store_ps(float* %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_store_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovaps %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_store_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vmovaps %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%arg0 = bitcast float* %a0 to <8 x float>*
|
|
|
|
store <8 x float> %a1, <8 x float>* %arg0, align 32
|
|
|
|
ret void
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define void @test_mm256_store_si256(<4 x i64>* %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_store_si256:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovaps %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_store_si256:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vmovaps %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
store <4 x i64> %a1, <4 x i64>* %a0, align 32
|
|
|
|
ret void
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define void @test_mm256_storeu_pd(double* %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_storeu_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovups %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_storeu_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vmovups %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%arg0 = bitcast double* %a0 to <4 x double>*
|
|
|
|
store <4 x double> %a1, <4 x double>* %arg0, align 1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_storeu_ps(float* %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_storeu_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovups %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_storeu_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to <8 x float>*
|
|
|
|
store <8 x float> %a1, <8 x float>* %arg0, align 1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_storeu_si256(<4 x i64>* %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_storeu_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovups %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_storeu_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
store <4 x i64> %a1, <4 x i64>* %a0, align 1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_storeu2_m128(float* %a0, float* %a1, <8 x float> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_storeu2_m128:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovups %xmm0, (%ecx)
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vmovups %xmm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_storeu2_m128:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vmovups %xmm0, (%rsi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
2016-05-31 02:42:51 +08:00
|
|
|
%arg0 = bitcast float* %a0 to <4 x float>*
|
2016-05-21 00:05:55 +08:00
|
|
|
%lo = shufflevector <8 x float> %a2, <8 x float> %a2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
2016-05-31 02:42:51 +08:00
|
|
|
store <4 x float> %lo, <4 x float>* %arg0, align 1
|
|
|
|
%arg1 = bitcast float* %a1 to <4 x float>*
|
2016-05-21 00:05:55 +08:00
|
|
|
%hi = shufflevector <8 x float> %a2, <8 x float> %a2, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
2016-05-31 02:42:51 +08:00
|
|
|
store <4 x float> %hi, <4 x float>* %arg1, align 1
|
2016-05-21 00:05:55 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_storeu2_m128d(double* %a0, double* %a1, <4 x double> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_storeu2_m128d:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovups %xmm0, (%ecx)
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vmovups %xmm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_storeu2_m128d:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vmovups %xmm0, (%rsi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
2016-05-31 02:42:51 +08:00
|
|
|
%arg0 = bitcast double* %a0 to <2 x double>*
|
2016-05-21 00:05:55 +08:00
|
|
|
%lo = shufflevector <4 x double> %a2, <4 x double> %a2, <2 x i32> <i32 0, i32 1>
|
2016-05-31 02:42:51 +08:00
|
|
|
store <2 x double> %lo, <2 x double>* %arg0, align 1
|
|
|
|
%arg1 = bitcast double* %a1 to <2 x double>*
|
2016-05-21 00:05:55 +08:00
|
|
|
%hi = shufflevector <4 x double> %a2, <4 x double> %a2, <2 x i32> <i32 2, i32 3>
|
2016-05-31 02:42:51 +08:00
|
|
|
store <2 x double> %hi, <2 x double>* %arg1, align 1
|
2016-05-21 00:05:55 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_storeu2_m128i(<2 x i64>* %a0, <2 x i64>* %a1, <4 x i64> %a2) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_storeu2_m128i:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: vmovups %xmm0, (%ecx)
|
|
|
|
; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X32-NEXT: vmovups %xmm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_storeu2_m128i:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovups %xmm0, (%rdi)
|
|
|
|
; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; X64-NEXT: vmovups %xmm0, (%rsi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
2016-05-31 02:42:51 +08:00
|
|
|
%arg0 = bitcast <2 x i64>* %a0 to <2 x i64>*
|
|
|
|
%lo = shufflevector <4 x i64> %a2, <4 x i64> %a2, <2 x i32> <i32 0, i32 1>
|
|
|
|
store <2 x i64> %lo, <2 x i64>* %arg0, align 1
|
|
|
|
%arg1 = bitcast <2 x i64>* %a1 to <2 x i64>*
|
|
|
|
%hi = shufflevector <4 x i64> %a2, <4 x i64> %a2, <2 x i32> <i32 2, i32 3>
|
|
|
|
store <2 x i64> %hi, <2 x i64>* %arg1, align 1
|
2016-05-21 00:05:55 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_stream_pd(double *%a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_stream_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovntps %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_stream_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovntps %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast double* %a0 to <4 x double>*
|
|
|
|
store <4 x double> %a1, <4 x double>* %arg0, align 32, !nontemporal !0
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_stream_ps(float *%a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_stream_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovntps %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_stream_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovntps %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%arg0 = bitcast float* %a0 to <8 x float>*
|
|
|
|
store <8 x float> %a1, <8 x float>* %arg0, align 32, !nontemporal !0
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @test_mm256_stream_si256(<4 x i64> *%a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_stream_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: vmovntps %ymm0, (%eax)
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_stream_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vmovntps %ymm0, (%rdi)
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
store <4 x i64> %a1, <4 x i64>* %a0, align 32, !nontemporal !0
|
|
|
|
ret void
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_sub_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_sub_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_sub_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fsub <4 x double> %a0, %a1
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_sub_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_sub_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_sub_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = fsub <8 x float> %a0, %a1
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define i32 @test_mm_testc_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_testc_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestpd %xmm1, %xmm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: setb %al
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm_testc_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestpd %xmm1, %xmm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: setb %al
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
|
|
|
|
ret i32 %res
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
2016-05-21 00:05:55 +08:00
|
|
|
declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
|
2016-02-19 22:38:09 +08:00
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define i32 @test_mm256_testc_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testc_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestpd %ymm1, %ymm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: setb %al
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_testc_pd:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestpd %ymm1, %ymm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: setb %al
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret i32 %res
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
2016-05-21 00:05:55 +08:00
|
|
|
declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
2016-02-19 22:38:09 +08:00
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define i32 @test_mm_testc_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_testc_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestps %xmm1, %xmm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: setb %al
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm_testc_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestps %xmm1, %xmm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: setb %al
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
|
|
|
|
ret i32 %res
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
2016-05-21 00:05:55 +08:00
|
|
|
declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
|
2016-02-19 22:38:09 +08:00
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define i32 @test_mm256_testc_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testc_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestps %ymm1, %ymm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: setb %al
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_testc_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestps %ymm1, %ymm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: setb %al
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testc_si256:
|
|
|
|
; X32: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vptest %ymm1, %ymm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X32-NEXT: setb %al
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testc_si256:
|
|
|
|
; X64: # BB#0:
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vptest %ymm1, %ymm0
|
2017-03-13 02:28:48 +08:00
|
|
|
; X64-NEXT: setb %al
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm_testnzc_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_testnzc_pd:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestpd %xmm1, %xmm0
|
|
|
|
; X32-NEXT: seta %al
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_testnzc_pd:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestpd %xmm1, %xmm0
|
|
|
|
; X64-NEXT: seta %al
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testnzc_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testnzc_pd:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestpd %ymm1, %ymm0
|
|
|
|
; X32-NEXT: seta %al
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testnzc_pd:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestpd %ymm1, %ymm0
|
|
|
|
; X64-NEXT: seta %al
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm_testnzc_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_testnzc_ps:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestps %xmm1, %xmm0
|
|
|
|
; X32-NEXT: seta %al
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_testnzc_ps:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestps %xmm1, %xmm0
|
|
|
|
; X64-NEXT: seta %al
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testnzc_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testnzc_ps:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestps %ymm1, %ymm0
|
|
|
|
; X32-NEXT: seta %al
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testnzc_ps:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestps %ymm1, %ymm0
|
|
|
|
; X64-NEXT: seta %al
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testnzc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testnzc_si256:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vptest %ymm1, %ymm0
|
|
|
|
; X32-NEXT: seta %al
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testnzc_si256:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vptest %ymm1, %ymm0
|
|
|
|
; X64-NEXT: seta %al
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm_testz_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_testz_pd:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestpd %xmm1, %xmm0
|
|
|
|
; X32-NEXT: sete %al
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_testz_pd:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestpd %xmm1, %xmm0
|
|
|
|
; X64-NEXT: sete %al
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testz_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testz_pd:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestpd %ymm1, %ymm0
|
|
|
|
; X32-NEXT: sete %al
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testz_pd:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestpd %ymm1, %ymm0
|
|
|
|
; X64-NEXT: sete %al
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm_testz_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm_testz_ps:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestps %xmm1, %xmm0
|
|
|
|
; X32-NEXT: sete %al
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_testz_ps:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestps %xmm1, %xmm0
|
|
|
|
; X64-NEXT: sete %al
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testz_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testz_ps:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vtestps %ymm1, %ymm0
|
|
|
|
; X32-NEXT: sete %al
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testz_ps:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vtestps %ymm1, %ymm0
|
|
|
|
; X64-NEXT: sete %al
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
|
|
|
|
|
|
|
|
define i32 @test_mm256_testz_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_testz_si256:
|
|
|
|
; X32: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X32-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X32-NEXT: vptest %ymm1, %ymm0
|
|
|
|
; X32-NEXT: sete %al
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_testz_si256:
|
|
|
|
; X64: # BB#0:
|
2016-07-08 06:50:23 +08:00
|
|
|
; X64-NEXT: xorl %eax, %eax
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-NEXT: vptest %ymm1, %ymm0
|
|
|
|
; X64-NEXT: sete %al
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
|
|
|
|
|
|
|
|
define <2 x double> @test_mm_undefined_pd() nounwind {
|
|
|
|
; X32-LABEL: test_mm_undefined_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm_undefined_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
ret <2 x double> undef
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test_mm256_undefined_pd() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_undefined_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_undefined_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
ret <4 x double> undef
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_undefined_ps() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_undefined_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
2016-05-21 00:05:55 +08:00
|
|
|
; X64-LABEL: test_mm256_undefined_ps:
|
2016-02-19 22:38:09 +08:00
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
2016-05-21 00:05:55 +08:00
|
|
|
ret <8 x float> undef
|
2016-02-19 22:38:09 +08:00
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x i64> @test_mm256_undefined_si256() nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_undefined_si256:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_undefined_si256:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: retq
|
|
|
|
ret <4 x i64> undef
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_unpackhi_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_unpackhi_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_unpackhi_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_unpackhi_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_unpackhi_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_unpackhi_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_unpacklo_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_unpacklo_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_unpacklo_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_unpacklo_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_unpacklo_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_unpacklo_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <4 x double> @test_mm256_xor_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_xor_pd:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_xor_pd:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <4 x double> %a0 to <4 x i64>
|
|
|
|
%2 = bitcast <4 x double> %a1 to <4 x i64>
|
|
|
|
%res = xor <4 x i64> %1, %2
|
|
|
|
%bc = bitcast <4 x i64> %res to <4 x double>
|
|
|
|
ret <4 x double> %bc
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define <8 x float> @test_mm256_xor_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
|
2016-02-19 22:38:09 +08:00
|
|
|
; X32-LABEL: test_mm256_xor_ps:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_xor_ps:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
|
|
; X64-NEXT: retq
|
|
|
|
%1 = bitcast <8 x float> %a0 to <8 x i32>
|
|
|
|
%2 = bitcast <8 x float> %a1 to <8 x i32>
|
|
|
|
%res = xor <8 x i32> %1, %2
|
|
|
|
%bc = bitcast <8 x i32> %res to <8 x float>
|
|
|
|
ret <8 x float> %bc
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
define void @test_mm256_zeroall() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_zeroall:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vzeroall
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_zeroall:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vzeroall
|
|
|
|
; X64-NEXT: retq
|
|
|
|
call void @llvm.x86.avx.vzeroall()
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.x86.avx.vzeroall() nounwind readnone
|
|
|
|
|
|
|
|
define void @test_mm256_zeroupper() nounwind {
|
|
|
|
; X32-LABEL: test_mm256_zeroupper:
|
|
|
|
; X32: # BB#0:
|
|
|
|
; X32-NEXT: vzeroupper
|
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_zeroupper:
|
|
|
|
; X64: # BB#0:
|
|
|
|
; X64-NEXT: vzeroupper
|
|
|
|
; X64-NEXT: retq
|
|
|
|
call void @llvm.x86.avx.vzeroupper()
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
declare void @llvm.x86.avx.vzeroupper() nounwind readnone
|
|
|
|
|
2017-04-30 01:15:12 +08:00
|
|
|
define <4 x double> @test_mm256_zextpd128_pd256(<2 x double> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_zextpd128_pd256:
|
|
|
|
; X32: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; X32-NEXT: vmovaps %xmm0, %xmm0
|
2017-04-30 01:15:12 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_zextpd128_pd256:
|
|
|
|
; X64: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; X64-NEXT: vmovaps %xmm0, %xmm0
|
2017-04-30 01:15:12 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x double> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test_mm256_zextps128_ps256(<4 x float> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_zextps128_ps256:
|
|
|
|
; X32: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; X32-NEXT: vmovaps %xmm0, %xmm0
|
2017-04-30 01:15:12 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_zextps128_ps256:
|
|
|
|
; X64: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; X64-NEXT: vmovaps %xmm0, %xmm0
|
2017-04-30 01:15:12 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
ret <8 x float> %res
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_mm256_zextsi128_si256(<2 x i64> %a0) nounwind {
|
|
|
|
; X32-LABEL: test_mm256_zextsi128_si256:
|
|
|
|
; X32: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; X32-NEXT: vmovaps %xmm0, %xmm0
|
2017-04-30 01:15:12 +08:00
|
|
|
; X32-NEXT: retl
|
|
|
|
;
|
|
|
|
; X64-LABEL: test_mm256_zextsi128_si256:
|
|
|
|
; X64: # BB#0:
|
2017-09-04 01:52:25 +08:00
|
|
|
; X64-NEXT: vmovaps %xmm0, %xmm0
|
2017-04-30 01:15:12 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
%res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i64> %res
|
|
|
|
}
|
|
|
|
|
2016-05-21 00:05:55 +08:00
|
|
|
!0 = !{i32 1}
|