[X86][SSE] Regenerated the vec_extract tests.

llvm-svn: 265183
This commit is contained in:
Simon Pilgrim 2016-04-01 20:55:19 +00:00
parent 66b1bb45b5
commit a372a0f295
5 changed files with 431 additions and 180 deletions

View File

@ -1,36 +1,108 @@
; RUN: llc < %s -march=x86-64
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
define <8 x i32> @a(<8 x i32> %a) nounwind {
; SSE-LABEL: a:
; SSE: # BB#0:
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: a:
; AVX1: # BB#0:
; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: a:
; AVX2: # BB#0:
; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
; AVX2-NEXT: vpsrad $16, %ymm0, %ymm0
; AVX2-NEXT: retq
%b = trunc <8 x i32> %a to <8 x i16>
%c = sext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}
define <3 x i32> @b(<3 x i32> %a) nounwind {
; SSE-LABEL: b:
; SSE: # BB#0:
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: b:
; AVX: # BB#0:
; AVX-NEXT: vpslld $16, %xmm0, %xmm0
; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX-NEXT: retq
%b = trunc <3 x i32> %a to <3 x i16>
%c = sext <3 x i16> %b to <3 x i32>
ret <3 x i32> %c
}
define <1 x i32> @c(<1 x i32> %a) nounwind {
; ALL-LABEL: c:
; ALL: # BB#0:
; ALL-NEXT: movswl %di, %eax
; ALL-NEXT: retq
%b = trunc <1 x i32> %a to <1 x i16>
%c = sext <1 x i16> %b to <1 x i32>
ret <1 x i32> %c
}
define <8 x i32> @d(<8 x i32> %a) nounwind {
; SSE-LABEL: d:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
; SSE-NEXT: andps %xmm2, %xmm0
; SSE-NEXT: andps %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: d:
; AVX1: # BB#0:
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: d:
; AVX2: # BB#0:
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%b = trunc <8 x i32> %a to <8 x i16>
%c = zext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}
define <3 x i32> @e(<3 x i32> %a) nounwind {
; SSE-LABEL: e:
; SSE: # BB#0:
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: e:
; AVX: # BB#0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7]
; AVX-NEXT: retq
%b = trunc <3 x i32> %a to <3 x i16>
%c = zext <3 x i16> %b to <3 x i32>
ret <3 x i32> %c
}
define <1 x i32> @f(<1 x i32> %a) nounwind {
; ALL-LABEL: f:
; ALL: # BB#0:
; ALL-NEXT: movzwl %di, %eax
; ALL-NEXT: retq
%b = trunc <1 x i32> %a to <1 x i16>
%c = zext <1 x i16> %b to <1 x i32>
ret <1 x i32> %c

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; When extracting multiple consecutive elements from a larger
; vector into a smaller one, do it efficiently. We should use
@ -8,11 +9,18 @@
; Extracting the low elements only requires using the right kind of store.
define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
; CHECK-LABEL: low_v8f32_to_v4f32:
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps %xmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: low_v8f32_to_v4f32:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovaps %xmm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: low_v8f32_to_v4f32:
; X64: # BB#0:
; X64-NEXT: vmovaps %xmm0, (%rdi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ext0 = extractelement <8 x float> %v, i32 0
%ext1 = extractelement <8 x float> %v, i32 1
%ext2 = extractelement <8 x float> %v, i32 2
@ -27,11 +35,18 @@ define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
; Extracting the high elements requires just one AVX instruction.
define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
; CHECK-LABEL: high_v8f32_to_v4f32:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: high_v8f32_to_v4f32:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: high_v8f32_to_v4f32:
; X64: # BB#0:
; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ext0 = extractelement <8 x float> %v, i32 4
%ext1 = extractelement <8 x float> %v, i32 5
%ext2 = extractelement <8 x float> %v, i32 6
@ -48,11 +63,18 @@ define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
; if we were actually using the vector in this function and
; have AVX2, we should generate vextracti128 (the int version).
define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
; CHECK-LABEL: high_v8i32_to_v4i32:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: high_v8i32_to_v4i32:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: high_v8i32_to_v4i32:
; X64: # BB#0:
; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ext0 = extractelement <8 x i32> %v, i32 4
%ext1 = extractelement <8 x i32> %v, i32 5
%ext2 = extractelement <8 x i32> %v, i32 6
@ -67,11 +89,18 @@ define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
; Make sure that element size doesn't alter the codegen.
define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
; CHECK-LABEL: high_v4f64_to_v2f64:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: high_v4f64_to_v2f64:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vextractf128 $1, %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: high_v4f64_to_v2f64:
; X64: # BB#0:
; X64-NEXT: vextractf128 $1, %ymm0, (%rdi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ext0 = extractelement <4 x double> %v, i32 2
%ext1 = extractelement <4 x double> %v, i32 3
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
@ -84,14 +113,25 @@ define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
; CHECK-LABEL: legal_vzmovl_2i32_8i32:
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; CHECK-NEXT: vmovaps %ymm0, (%rsi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: legal_vzmovl_2i32_8i32:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: legal_vzmovl_2i32_8i32:
; X64: # BB#0:
; X64-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X64-NEXT: vmovaps %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ld = load <2 x i32>, <2 x i32>* %in, align 8
%ext = extractelement <2 x i32> %ld, i64 0
%ins = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %ext, i64 0
@ -100,14 +140,25 @@ define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
}
define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
; CHECK-LABEL: legal_vzmovl_2i64_4i64:
; CHECK: # BB#0:
; CHECK-NEXT: vmovupd (%rdi), %xmm0
; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; CHECK-NEXT: vmovapd %ymm0, (%rsi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: legal_vzmovl_2i64_4i64:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovupd (%ecx), %xmm0
; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X32-NEXT: vmovapd %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: legal_vzmovl_2i64_4i64:
; X64: # BB#0:
; X64-NEXT: vmovupd (%rdi), %xmm0
; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X64-NEXT: vmovapd %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ld = load <2 x i64>, <2 x i64>* %in, align 8
%ext = extractelement <2 x i64> %ld, i64 0
%ins = insertelement <4 x i64> <i64 undef, i64 0, i64 0, i64 0>, i64 %ext, i64 0
@ -116,14 +167,23 @@ define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
}
define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
; CHECK-LABEL: legal_vzmovl_2f32_8f32:
; CHECK: # BB#0:
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; CHECK-NEXT: vmovaps %ymm0, (%rsi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: legal_vzmovl_2f32_8f32:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: legal_vzmovl_2f32_8f32:
; X64: # BB#0:
; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
; X64-NEXT: vmovaps %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ld = load <2 x float>, <2 x float>* %in, align 8
%ext = extractelement <2 x float> %ld, i64 0
%ins = insertelement <8 x float> <float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
@ -132,14 +192,25 @@ define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
}
define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
; CHECK-LABEL: legal_vzmovl_2f64_4f64:
; CHECK: # BB#0:
; CHECK-NEXT: vmovupd (%rdi), %xmm0
; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; CHECK-NEXT: vmovapd %ymm0, (%rsi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; X32-LABEL: legal_vzmovl_2f64_4f64:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovupd (%ecx), %xmm0
; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X32-NEXT: vmovapd %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: legal_vzmovl_2f64_4f64:
; X64: # BB#0:
; X64-NEXT: vmovupd (%rdi), %xmm0
; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; X64-NEXT: vmovapd %ymm0, (%rsi)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%ld = load <2 x double>, <2 x double>* %in, align 8
%ext = extractelement <2 x double> %ld, i64 0
%ins = insertelement <4 x double> <double undef, double 0.0, double 0.0, double 0.0>, double %ext, i64 0

View File

@ -1,12 +1,35 @@
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
define i32 @test0(<1 x i64>* %v4) {
; CHECK-LABEL: test0:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: pshufw $238, (%[[REG:[a-z]+]]), %mm0
; CHECK-NEXT: movd %mm0, %eax
; CHECK-NEXT: addl $32, %eax
; CHECK-NEXT: retq
define i32 @test0(<1 x i64>* %v4) nounwind {
; X32-LABEL: test0:
; X32: # BB#0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $24, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: movl 4(%eax), %eax
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movl %ecx, (%esp)
; X32-NEXT: pshufw $238, (%esp), %mm0 # mm0 = mem[2,3,2,3]
; X32-NEXT: movq %mm0, {{[0-9]+}}(%esp)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: addl $32, %eax
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; X64-LABEL: test0:
; X64: # BB#0: # %entry
; X64-NEXT: pshufw $238, (%rdi), %mm0 # mm0 = mem[2,3,2,3]
; X64-NEXT: movd %mm0, %eax
; X64-NEXT: addl $32, %eax
; X64-NEXT: retq
entry:
%v5 = load <1 x i64>, <1 x i64>* %v4, align 8
%v12 = bitcast <1 x i64> %v5 to <4 x i16>
@ -21,14 +44,32 @@ entry:
ret i32 %v20
}
define i32 @test1(i32* nocapture readonly %ptr) {
; CHECK-LABEL: test1:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: movd (%[[REG]]), %mm0
; CHECK-NEXT: pshufw $232, %mm0, %mm0
; CHECK-NEXT: movd %mm0, %eax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
define i32 @test1(i32* nocapture readonly %ptr) nounwind {
; X32-LABEL: test1:
; X32: # BB#0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: movd (%eax), %mm0
; X32-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3]
; X32-NEXT: movq %mm0, (%esp)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: emms
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # BB#0: # %entry
; X64-NEXT: movd (%rdi), %mm0
; X64-NEXT: pshufw $232, %mm0, %mm0 # mm0 = mm0[0,2,2,3]
; X64-NEXT: movd %mm0, %eax
; X64-NEXT: emms
; X64-NEXT: retq
entry:
%0 = load i32, i32* %ptr, align 4
%1 = insertelement <2 x i32> undef, i32 %0, i32 0
@ -47,13 +88,30 @@ entry:
ret i32 %12
}
define i32 @test2(i32* nocapture readonly %ptr) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: pshufw $232, (%[[REG]]), %mm0
; CHECK-NEXT: movd %mm0, %eax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
define i32 @test2(i32* nocapture readonly %ptr) nounwind {
; X32-LABEL: test2:
; X32: # BB#0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 8(%ebp), %eax
; X32-NEXT: pshufw $232, (%eax), %mm0 # mm0 = mem[0,2,2,3]
; X32-NEXT: movq %mm0, (%esp)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: emms
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # BB#0: # %entry
; X64-NEXT: pshufw $232, (%rdi), %mm0 # mm0 = mem[0,2,2,3]
; X64-NEXT: movd %mm0, %eax
; X64-NEXT: emms
; X64-NEXT: retq
entry:
%0 = bitcast i32* %ptr to x86_mmx*
%1 = load x86_mmx, x86_mmx* %0, align 8

View File

@ -1,59 +1,79 @@
; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
define void @t1(float* %R, <4 x float>* %P1) nounwind {
; CHECK-LABEL: t1:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movss 12(%ecx), %xmm0
; CHECK-NEXT: movss %xmm0, (%eax)
; CHECK-NEXT: retl
%X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 3
store float %tmp, float* %R
ret void
; X32-LABEL: t1:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # BB#0:
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss %xmm0, (%rdi)
; X64-NEXT: retq
%X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 3
store float %tmp, float* %R
ret void
}
define float @t2(<4 x float>* %P1) nounwind {
; CHECK-LABEL: t2:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; CHECK-NEXT: movss %xmm0, (%esp)
; CHECK-NEXT: flds (%esp)
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 2
ret float %tmp
; X32-LABEL: t2:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # BB#0:
; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: retq
%X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 2
ret float %tmp
}
define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
; CHECK-LABEL: t3:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl 12(%ecx), %ecx
; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: retl
%X = load <4 x i32>, <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
store i32 %tmp, i32* %R
ret void
; X32-LABEL: t3:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl 12(%ecx), %ecx
; X32-NEXT: movl %ecx, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: t3:
; X64: # BB#0:
; X64-NEXT: movl 12(%rsi), %eax
; X64-NEXT: movl %eax, (%rdi)
; X64-NEXT: retq
%X = load <4 x i32>, <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
store i32 %tmp, i32* %R
ret void
}
define i32 @t4(<4 x i32>* %P1) nounwind {
; CHECK-LABEL: t4:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl 12(%eax), %eax
; CHECK-NEXT: retl
%X = load <4 x i32>, <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
ret i32 %tmp
; X32-LABEL: t4:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl 12(%eax), %eax
; X32-NEXT: retl
;
; X64-LABEL: t4:
; X64: # BB#0:
; X64-NEXT: movl 12(%rdi), %eax
; X64-NEXT: retq
%X = load <4 x i32>, <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
ret i32 %tmp
}

View File

@ -1,74 +1,104 @@
; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64
define void @test1(<4 x float>* %F, float* %f) nounwind {
; CHECK-LABEL: test1:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movaps (%ecx), %xmm0
; CHECK-NEXT: addps %xmm0, %xmm0
; CHECK-NEXT: movss %xmm0, (%eax)
; CHECK-NEXT: retl
; X32-LABEL: test1:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movaps (%ecx), %xmm0
; X32-NEXT: addps %xmm0, %xmm0
; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # BB#0: # %entry
; X64-NEXT: movaps (%rdi), %xmm0
; X64-NEXT: addps %xmm0, %xmm0
; X64-NEXT: movss %xmm0, (%rsi)
; X64-NEXT: retq
entry:
%tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
store float %tmp2, float* %f
ret void
%tmp = load <4 x float>, <4 x float>* %F
%tmp7 = fadd <4 x float> %tmp, %tmp
%tmp2 = extractelement <4 x float> %tmp7, i32 0
store float %tmp2, float* %f
ret void
}
define float @test2(<4 x float>* %F, float* %f) nounwind {
; CHECK-LABEL: test2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movaps (%eax), %xmm0
; CHECK-NEXT: addps %xmm0, %xmm0
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: movss %xmm0, (%esp)
; CHECK-NEXT: flds (%esp)
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
; X32-LABEL: test2:
; X32: # BB#0: # %entry
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movaps (%eax), %xmm0
; X32-NEXT: addps %xmm0, %xmm0
; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # BB#0: # %entry
; X64-NEXT: movaps (%rdi), %xmm0
; X64-NEXT: addps %xmm0, %xmm0
; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; X64-NEXT: retq
entry:
%tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
ret float %tmp2
%tmp = load <4 x float>, <4 x float>* %F
%tmp7 = fadd <4 x float> %tmp, %tmp
%tmp2 = extractelement <4 x float> %tmp7, i32 2
ret float %tmp2
}
define void @test3(float* %R, <4 x float>* %P1) nounwind {
; CHECK-LABEL: test3:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movss 12(%ecx), %xmm0
; CHECK-NEXT: movss %xmm0, (%eax)
; CHECK-NEXT: retl
; X32-LABEL: test3:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: test3:
; X64: # BB#0: # %entry
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss %xmm0, (%rdi)
; X64-NEXT: retq
entry:
%X = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
store float %tmp, float* %R
ret void
%X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 3
store float %tmp, float* %R
ret void
}
define double @test4(double %A) nounwind {
; CHECK-LABEL: test4:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: calll foo
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: addsd {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movsd %xmm0, (%esp)
; CHECK-NEXT: fldl (%esp)
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
; X32-LABEL: test4:
; X32: # BB#0: # %entry
; X32-NEXT: subl $12, %esp
; X32-NEXT: calll foo
; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; X32-NEXT: addsd {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: movsd %xmm0, (%esp)
; X32-NEXT: fldl (%esp)
; X32-NEXT: addl $12, %esp
; X32-NEXT: retl
;
; X64-LABEL: test4:
; X64: # BB#0: # %entry
; X64-NEXT: pushq %rax
; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; X64-NEXT: callq foo
; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; X64-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
; X64-NEXT: popq %rax
; X64-NEXT: retq
entry:
%tmp1 = call <2 x double> @foo( ) ; <<2 x double>> [#uses=1]
%tmp2 = extractelement <2 x double> %tmp1, i32 1 ; <double> [#uses=1]
%tmp3 = fadd double %tmp2, %A ; <double> [#uses=1]
ret double %tmp3
%tmp1 = call <2 x double> @foo( )
%tmp2 = extractelement <2 x double> %tmp1, i32 1
%tmp3 = fadd double %tmp2, %A
ret double %tmp3
}
declare <2 x double> @foo()