forked from OSchip/llvm-project
[X86][SSE] Regenerated the vec_extract tests.
llvm-svn: 265183
This commit is contained in:
parent
66b1bb45b5
commit
a372a0f295
|
@ -1,36 +1,108 @@
|
|||
; RUN: llc < %s -march=x86-64
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
|
||||
|
||||
define <8 x i32> @a(<8 x i32> %a) nounwind {
|
||||
; SSE-LABEL: a:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: pslld $16, %xmm0
|
||||
; SSE-NEXT: psrad $16, %xmm0
|
||||
; SSE-NEXT: pslld $16, %xmm1
|
||||
; SSE-NEXT: psrad $16, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: a:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: a:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%b = trunc <8 x i32> %a to <8 x i16>
|
||||
%c = sext <8 x i16> %b to <8 x i32>
|
||||
ret <8 x i32> %c
|
||||
}
|
||||
|
||||
define <3 x i32> @b(<3 x i32> %a) nounwind {
|
||||
; SSE-LABEL: b:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: pslld $16, %xmm0
|
||||
; SSE-NEXT: psrad $16, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: b:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpslld $16, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%b = trunc <3 x i32> %a to <3 x i16>
|
||||
%c = sext <3 x i16> %b to <3 x i32>
|
||||
ret <3 x i32> %c
|
||||
}
|
||||
|
||||
define <1 x i32> @c(<1 x i32> %a) nounwind {
|
||||
; ALL-LABEL: c:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: movswl %di, %eax
|
||||
; ALL-NEXT: retq
|
||||
%b = trunc <1 x i32> %a to <1 x i16>
|
||||
%c = sext <1 x i16> %b to <1 x i32>
|
||||
ret <1 x i32> %c
|
||||
}
|
||||
|
||||
define <8 x i32> @d(<8 x i32> %a) nounwind {
|
||||
; SSE-LABEL: d:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
|
||||
; SSE-NEXT: andps %xmm2, %xmm0
|
||||
; SSE-NEXT: andps %xmm2, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: d:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: d:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-NEXT: retq
|
||||
%b = trunc <8 x i32> %a to <8 x i16>
|
||||
%c = zext <8 x i16> %b to <8 x i32>
|
||||
ret <8 x i32> %c
|
||||
}
|
||||
|
||||
define <3 x i32> @e(<3 x i32> %a) nounwind {
|
||||
; SSE-LABEL: e:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: e:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
|
||||
; AVX-NEXT: retq
|
||||
%b = trunc <3 x i32> %a to <3 x i16>
|
||||
%c = zext <3 x i16> %b to <3 x i32>
|
||||
ret <3 x i32> %c
|
||||
}
|
||||
|
||||
define <1 x i32> @f(<1 x i32> %a) nounwind {
|
||||
; ALL-LABEL: f:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: movzwl %di, %eax
|
||||
; ALL-NEXT: retq
|
||||
%b = trunc <1 x i32> %a to <1 x i16>
|
||||
%c = zext <1 x i16> %b to <1 x i32>
|
||||
ret <1 x i32> %c
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
|
||||
|
||||
; When extracting multiple consecutive elements from a larger
|
||||
; vector into a smaller one, do it efficiently. We should use
|
||||
|
@ -8,11 +9,18 @@
|
|||
|
||||
; Extracting the low elements only requires using the right kind of store.
|
||||
define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
|
||||
; CHECK-LABEL: low_v8f32_to_v4f32:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: low_v8f32_to_v4f32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovaps %xmm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: low_v8f32_to_v4f32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ext0 = extractelement <8 x float> %v, i32 0
|
||||
%ext1 = extractelement <8 x float> %v, i32 1
|
||||
%ext2 = extractelement <8 x float> %v, i32 2
|
||||
|
@ -27,11 +35,18 @@ define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
|
|||
|
||||
; Extracting the high elements requires just one AVX instruction.
|
||||
define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
|
||||
; CHECK-LABEL: high_v8f32_to_v4f32:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: high_v8f32_to_v4f32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: high_v8f32_to_v4f32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ext0 = extractelement <8 x float> %v, i32 4
|
||||
%ext1 = extractelement <8 x float> %v, i32 5
|
||||
%ext2 = extractelement <8 x float> %v, i32 6
|
||||
|
@ -48,11 +63,18 @@ define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
|
|||
; if we were actually using the vector in this function and
|
||||
; have AVX2, we should generate vextracti128 (the int version).
|
||||
define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
|
||||
; CHECK-LABEL: high_v8i32_to_v4i32:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: high_v8i32_to_v4i32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: high_v8i32_to_v4i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ext0 = extractelement <8 x i32> %v, i32 4
|
||||
%ext1 = extractelement <8 x i32> %v, i32 5
|
||||
%ext2 = extractelement <8 x i32> %v, i32 6
|
||||
|
@ -67,11 +89,18 @@ define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
|
|||
|
||||
; Make sure that element size doesn't alter the codegen.
|
||||
define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
|
||||
; CHECK-LABEL: high_v4f64_to_v2f64:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: high_v4f64_to_v2f64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: high_v4f64_to_v2f64:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ext0 = extractelement <4 x double> %v, i32 2
|
||||
%ext1 = extractelement <4 x double> %v, i32 3
|
||||
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
|
||||
|
@ -84,14 +113,25 @@ define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
|
|||
; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
|
||||
|
||||
define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
|
||||
; CHECK-LABEL: legal_vzmovl_2i32_8i32:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%rsi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: legal_vzmovl_2i32_8i32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; X32-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: legal_vzmovl_2i32_8i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; X64-NEXT: vmovaps %ymm0, (%rsi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ld = load <2 x i32>, <2 x i32>* %in, align 8
|
||||
%ext = extractelement <2 x i32> %ld, i64 0
|
||||
%ins = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %ext, i64 0
|
||||
|
@ -100,14 +140,25 @@ define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
|
|||
}
|
||||
|
||||
define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
|
||||
; CHECK-LABEL: legal_vzmovl_2i64_4i64:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovupd (%rdi), %xmm0
|
||||
; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; CHECK-NEXT: vmovapd %ymm0, (%rsi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: legal_vzmovl_2i64_4i64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovupd (%ecx), %xmm0
|
||||
; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; X32-NEXT: vmovapd %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: legal_vzmovl_2i64_4i64:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vmovupd (%rdi), %xmm0
|
||||
; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; X64-NEXT: vmovapd %ymm0, (%rsi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ld = load <2 x i64>, <2 x i64>* %in, align 8
|
||||
%ext = extractelement <2 x i64> %ld, i64 0
|
||||
%ins = insertelement <4 x i64> <i64 undef, i64 0, i64 0, i64 0>, i64 %ext, i64 0
|
||||
|
@ -116,14 +167,23 @@ define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
|
|||
}
|
||||
|
||||
define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
|
||||
; CHECK-LABEL: legal_vzmovl_2f32_8f32:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%rsi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: legal_vzmovl_2f32_8f32:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: legal_vzmovl_2f32_8f32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; X64-NEXT: vmovaps %ymm0, (%rsi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ld = load <2 x float>, <2 x float>* %in, align 8
|
||||
%ext = extractelement <2 x float> %ld, i64 0
|
||||
%ins = insertelement <8 x float> <float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
|
||||
|
@ -132,14 +192,25 @@ define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
|
|||
}
|
||||
|
||||
define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
|
||||
; CHECK-LABEL: legal_vzmovl_2f64_4f64:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovupd (%rdi), %xmm0
|
||||
; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; CHECK-NEXT: vmovapd %ymm0, (%rsi)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
; X32-LABEL: legal_vzmovl_2f64_4f64:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovupd (%ecx), %xmm0
|
||||
; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; X32-NEXT: vmovapd %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: legal_vzmovl_2f64_4f64:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: vmovupd (%rdi), %xmm0
|
||||
; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; X64-NEXT: vmovapd %ymm0, (%rsi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
%ld = load <2 x double>, <2 x double>* %in, align 8
|
||||
%ext = extractelement <2 x double> %ld, i64 0
|
||||
%ins = insertelement <4 x double> <double undef, double 0.0, double 0.0, double 0.0>, double %ext, i64 0
|
||||
|
|
|
@ -1,12 +1,35 @@
|
|||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define i32 @test0(<1 x i64>* %v4) {
|
||||
; CHECK-LABEL: test0:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: pshufw $238, (%[[REG:[a-z]+]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %eax
|
||||
; CHECK-NEXT: addl $32, %eax
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @test0(<1 x i64>* %v4) nounwind {
|
||||
; X32-LABEL: test0:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $24, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: movl (%eax), %ecx
|
||||
; X32-NEXT: movl 4(%eax), %eax
|
||||
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl %ecx, (%esp)
|
||||
; X32-NEXT: pshufw $238, (%esp), %mm0 # mm0 = mem[2,3,2,3]
|
||||
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; X32-NEXT: movd %xmm0, %eax
|
||||
; X32-NEXT: addl $32, %eax
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test0:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: pshufw $238, (%rdi), %mm0 # mm0 = mem[2,3,2,3]
|
||||
; X64-NEXT: movd %mm0, %eax
|
||||
; X64-NEXT: addl $32, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%v5 = load <1 x i64>, <1 x i64>* %v4, align 8
|
||||
%v12 = bitcast <1 x i64> %v5 to <4 x i16>
|
||||
|
@ -21,14 +44,32 @@ entry:
|
|||
ret i32 %v20
|
||||
}
|
||||
|
||||
define i32 @test1(i32* nocapture readonly %ptr) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: movd (%[[REG]]), %mm0
|
||||
; CHECK-NEXT: pshufw $232, %mm0, %mm0
|
||||
; CHECK-NEXT: movd %mm0, %eax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @test1(i32* nocapture readonly %ptr) nounwind {
|
||||
; X32-LABEL: test1:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $16, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: movd (%eax), %mm0
|
||||
; X32-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3]
|
||||
; X32-NEXT: movq %mm0, (%esp)
|
||||
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; X32-NEXT: movd %xmm0, %eax
|
||||
; X32-NEXT: emms
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test1:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movd (%rdi), %mm0
|
||||
; X64-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3]
|
||||
; X64-NEXT: movd %mm0, %eax
|
||||
; X64-NEXT: emms
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = load i32, i32* %ptr, align 4
|
||||
%1 = insertelement <2 x i32> undef, i32 %0, i32 0
|
||||
|
@ -47,13 +88,30 @@ entry:
|
|||
ret i32 %12
|
||||
}
|
||||
|
||||
define i32 @test2(i32* nocapture readonly %ptr) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: pshufw $232, (%[[REG]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %eax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @test2(i32* nocapture readonly %ptr) nounwind {
|
||||
; X32-LABEL: test2:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-8, %esp
|
||||
; X32-NEXT: subl $16, %esp
|
||||
; X32-NEXT: movl 8(%ebp), %eax
|
||||
; X32-NEXT: pshufw $232, (%eax), %mm0 # mm0 = mem[0,2,2,3]
|
||||
; X32-NEXT: movq %mm0, (%esp)
|
||||
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; X32-NEXT: movd %xmm0, %eax
|
||||
; X32-NEXT: emms
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test2:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: pshufw $232, (%rdi), %mm0 # mm0 = mem[0,2,2,3]
|
||||
; X64-NEXT: movd %mm0, %eax
|
||||
; X64-NEXT: emms
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i32* %ptr to x86_mmx*
|
||||
%1 = load x86_mmx, x86_mmx* %0, align 8
|
||||
|
|
|
@ -1,59 +1,79 @@
|
|||
; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define void @t1(float* %R, <4 x float>* %P1) nounwind {
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movss 12(%ecx), %xmm0
|
||||
; CHECK-NEXT: movss %xmm0, (%eax)
|
||||
; CHECK-NEXT: retl
|
||||
|
||||
%X = load <4 x float>, <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 3
|
||||
store float %tmp, float* %R
|
||||
ret void
|
||||
; X32-LABEL: t1:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: movss %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t1:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: movss %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x float>, <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 3
|
||||
store float %tmp, float* %R
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @t2(<4 x float>* %P1) nounwind {
|
||||
; CHECK-LABEL: t2:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: pushl %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; CHECK-NEXT: movss %xmm0, (%esp)
|
||||
; CHECK-NEXT: flds (%esp)
|
||||
; CHECK-NEXT: popl %eax
|
||||
; CHECK-NEXT: retl
|
||||
|
||||
%X = load <4 x float>, <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 2
|
||||
ret float %tmp
|
||||
; X32-LABEL: t2:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; X32-NEXT: movss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t2:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x float>, <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 2
|
||||
ret float %tmp
|
||||
}
|
||||
|
||||
define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
|
||||
; CHECK-LABEL: t3:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl 12(%ecx), %ecx
|
||||
; CHECK-NEXT: movl %ecx, (%eax)
|
||||
; CHECK-NEXT: retl
|
||||
|
||||
%X = load <4 x i32>, <4 x i32>* %P1
|
||||
%tmp = extractelement <4 x i32> %X, i32 3
|
||||
store i32 %tmp, i32* %R
|
||||
ret void
|
||||
; X32-LABEL: t3:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl 12(%ecx), %ecx
|
||||
; X32-NEXT: movl %ecx, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t3:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl 12(%rsi), %eax
|
||||
; X64-NEXT: movl %eax, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x i32>, <4 x i32>* %P1
|
||||
%tmp = extractelement <4 x i32> %X, i32 3
|
||||
store i32 %tmp, i32* %R
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @t4(<4 x i32>* %P1) nounwind {
|
||||
; CHECK-LABEL: t4:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl 12(%eax), %eax
|
||||
; CHECK-NEXT: retl
|
||||
|
||||
%X = load <4 x i32>, <4 x i32>* %P1
|
||||
%tmp = extractelement <4 x i32> %X, i32 3
|
||||
ret i32 %tmp
|
||||
; X32-LABEL: t4:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl 12(%eax), %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t4:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl 12(%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
%X = load <4 x i32>, <4 x i32>* %P1
|
||||
%tmp = extractelement <4 x i32> %X, i32 3
|
||||
ret i32 %tmp
|
||||
}
|
||||
|
|
|
@ -1,74 +1,104 @@
|
|||
; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 | FileCheck %s
|
||||
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64
|
||||
|
||||
define void @test1(<4 x float>* %F, float* %f) nounwind {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movaps (%ecx), %xmm0
|
||||
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||
; CHECK-NEXT: movss %xmm0, (%eax)
|
||||
; CHECK-NEXT: retl
|
||||
; X32-LABEL: test1:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movaps (%ecx), %xmm0
|
||||
; X32-NEXT: addps %xmm0, %xmm0
|
||||
; X32-NEXT: movss %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test1:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movaps (%rdi), %xmm0
|
||||
; X64-NEXT: addps %xmm0, %xmm0
|
||||
; X64-NEXT: movss %xmm0, (%rsi)
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
|
||||
%tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
|
||||
%tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
|
||||
store float %tmp2, float* %f
|
||||
ret void
|
||||
%tmp = load <4 x float>, <4 x float>* %F
|
||||
%tmp7 = fadd <4 x float> %tmp, %tmp
|
||||
%tmp2 = extractelement <4 x float> %tmp7, i32 0
|
||||
store float %tmp2, float* %f
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @test2(<4 x float>* %F, float* %f) nounwind {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: pushl %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movaps (%eax), %xmm0
|
||||
; CHECK-NEXT: addps %xmm0, %xmm0
|
||||
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; CHECK-NEXT: movss %xmm0, (%esp)
|
||||
; CHECK-NEXT: flds (%esp)
|
||||
; CHECK-NEXT: popl %eax
|
||||
; CHECK-NEXT: retl
|
||||
; X32-LABEL: test2:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movaps (%eax), %xmm0
|
||||
; X32-NEXT: addps %xmm0, %xmm0
|
||||
; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; X32-NEXT: movss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test2:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movaps (%rdi), %xmm0
|
||||
; X64-NEXT: addps %xmm0, %xmm0
|
||||
; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
|
||||
%tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
|
||||
%tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
|
||||
ret float %tmp2
|
||||
%tmp = load <4 x float>, <4 x float>* %F
|
||||
%tmp7 = fadd <4 x float> %tmp, %tmp
|
||||
%tmp2 = extractelement <4 x float> %tmp7, i32 2
|
||||
ret float %tmp2
|
||||
}
|
||||
|
||||
define void @test3(float* %R, <4 x float>* %P1) nounwind {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movss 12(%ecx), %xmm0
|
||||
; CHECK-NEXT: movss %xmm0, (%eax)
|
||||
; CHECK-NEXT: retl
|
||||
; X32-LABEL: test3:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: movss %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test3:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: movss %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%X = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
|
||||
%tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
|
||||
store float %tmp, float* %R
|
||||
ret void
|
||||
%X = load <4 x float>, <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 3
|
||||
store float %tmp, float* %R
|
||||
ret void
|
||||
}
|
||||
|
||||
define double @test4(double %A) nounwind {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: subl $12, %esp
|
||||
; CHECK-NEXT: calll foo
|
||||
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; CHECK-NEXT: addsd {{[0-9]+}}(%esp), %xmm0
|
||||
; CHECK-NEXT: movsd %xmm0, (%esp)
|
||||
; CHECK-NEXT: fldl (%esp)
|
||||
; CHECK-NEXT: addl $12, %esp
|
||||
; CHECK-NEXT: retl
|
||||
; X32-LABEL: test4:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: subl $12, %esp
|
||||
; X32-NEXT: calll foo
|
||||
; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; X32-NEXT: addsd {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-NEXT: movsd %xmm0, (%esp)
|
||||
; X32-NEXT: fldl (%esp)
|
||||
; X32-NEXT: addl $12, %esp
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test4:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: pushq %rax
|
||||
; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
||||
; X64-NEXT: callq foo
|
||||
; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; X64-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
|
||||
; X64-NEXT: popq %rax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp1 = call <2 x double> @foo( ) ; <<2 x double>> [#uses=1]
|
||||
%tmp2 = extractelement <2 x double> %tmp1, i32 1 ; <double> [#uses=1]
|
||||
%tmp3 = fadd double %tmp2, %A ; <double> [#uses=1]
|
||||
ret double %tmp3
|
||||
%tmp1 = call <2 x double> @foo( )
|
||||
%tmp2 = extractelement <2 x double> %tmp1, i32 1
|
||||
%tmp3 = fadd double %tmp2, %A
|
||||
ret double %tmp3
|
||||
}
|
||||
|
||||
declare <2 x double> @foo()
|
||||
|
|
Loading…
Reference in New Issue