diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 737bfff1991c..3f8916df0ee9 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-pc-linux -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE2 -; RUN: llc -mtriple=x86_64-pc-linux -mattr=+sse4.2 < %s | FileCheck %s --check-prefix=SSE42 -; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX -; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) { ; SSE2-LABEL: v3i64: @@ -20,12 +20,12 @@ define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) { ; SSE42-NEXT: movdqa %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v3i64: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] -; AVX-NEXT: vpextrq $1, %xmm0, 16(%rdi) -; AVX-NEXT: vmovapd %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v3i64: +; AVX1: # BB#0: +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; AVX1-NEXT: vpextrq $1, %xmm0, 16(%rdi) +; AVX1-NEXT: vmovapd %xmm1, (%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v3i64: ; AVX2: # BB#0: @@ -41,26 +41,19 @@ define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) { ret void } define void @v3f64(<2 x double> %a, <2 x double> %b, <3 x double>* %p) { -; SSE2-LABEL: v3f64: -; SSE2: # BB#0: -; SSE2-NEXT: movhpd %xmm0, 16(%rdi) -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-NEXT: movapd %xmm0, (%rdi) -; SSE2-NEXT: retq +; SSE-LABEL: v3f64: +; SSE: # BB#0: +; SSE-NEXT: movhpd %xmm0, 16(%rdi) +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movapd %xmm0, (%rdi) +; SSE-NEXT: retq ; -; SSE42-LABEL: v3f64: -; SSE42: # BB#0: -; SSE42-NEXT: movhpd %xmm0, 16(%rdi) -; SSE42-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE42-NEXT: movapd %xmm0, (%rdi) -; SSE42-NEXT: retq -; -; AVX-LABEL: v3f64: -; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] -; AVX-NEXT: vmovhpd %xmm0, 16(%rdi) -; AVX-NEXT: vmovapd %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v3f64: +; AVX1: # BB#0: +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovhpd %xmm0, 16(%rdi) +; AVX1-NEXT: vmovapd %xmm1, (%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v3f64: ; AVX2: # BB#0: @@ -94,13 +87,13 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) { ; SSE42-NEXT: movq %xmm1, (%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v3i32: -; AVX: # BB#0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; AVX-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX-NEXT: vmovq %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v3i32: +; AVX1: # BB#0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpextrd $2, %xmm0, 8(%rdi) +; AVX1-NEXT: vmovq %xmm1, (%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v3i32: ; AVX2: # BB#0: @@ -148,17 +141,6 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) { ; AVX-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX-NEXT: vmovq %xmm1, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v5i16: -; AVX2: # BB#0: -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX2-NEXT: vpextrw $6, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovq %xmm1, (%rdi) -; AVX2-NEXT: retq %r = shufflevector <4 x i16> %a, <4 x i16> %b, <5 x i32> store <5 x i16> %r, <5 x i16>* %p ret void @@ -183,13 +165,13 @@ define void @v5i32(<4 x i32> %a, <4 x i32> %b, <5 x i32>* %p) { ; SSE42-NEXT: movdqa %xmm2, (%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v5i32: -; AVX: # BB#0: -; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; AVX-NEXT: vpextrd $3, %xmm0, 16(%rdi) -; AVX-NEXT: vmovaps %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v5i32: +; AVX1: # BB#0: +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] +; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; AVX1-NEXT: vmovaps %xmm1, (%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v5i32: ; AVX2: # BB#0: @@ -225,13 +207,13 @@ define void @v5f32(<4 x float> %a, <4 x float> %b, <5 x float>* %p) { ; SSE42-NEXT: movaps %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v5f32: -; AVX: # BB#0: -; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; AVX-NEXT: vextractps $3, %xmm0, 16(%rdi) -; AVX-NEXT: vmovaps %xmm1, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v5f32: +; AVX1: # BB#0: +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] +; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi) +; AVX1-NEXT: vmovaps %xmm1, (%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v5f32: ; AVX2: # BB#0: @@ -298,18 +280,6 @@ define void @v7i8(<4 x i8> %a, <4 x i8> %b, <7 x i8>* %p) { ; AVX-NEXT: vpextrw $4, %xmm0, 4(%rdi) ; AVX-NEXT: vmovd %xmm2, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v7i8: -; AVX2: # BB#0: -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[12],zero,xmm0[4],zero,zero,xmm0[u,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,xmm1[8],zero,xmm1[8],zero,xmm1[12,0,u,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: vpextrb $0, %xmm1, 6(%rdi) -; AVX2-NEXT: vpextrw $4, %xmm0, 4(%rdi) -; AVX2-NEXT: vmovd %xmm2, (%rdi) -; AVX2-NEXT: retq %r = shufflevector <4 x i8> %a, <4 x i8> %b, <7 x i32> store <7 x i8> %r, <7 x i8>* %p ret void @@ -352,16 +322,6 @@ define void @v7i16(<4 x i16> %a, <4 x i16> %b, <7 x i16>* %p) { ; AVX-NEXT: vpextrd $2, %xmm0, 8(%rdi) ; AVX-NEXT: vmovq %xmm0, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v7i16: -; AVX2: # BB#0: -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,1,3] -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[8,9,8,9,4,5,8,9,0,1,12,13,0,1,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5,6,7] -; AVX2-NEXT: vpextrw $0, %xmm1, 12(%rdi) -; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovq %xmm0, (%rdi) -; AVX2-NEXT: retq %r = shufflevector <4 x i16> %a, <4 x i16> %b, <7 x i32> store <7 x i16> %r, <7 x i16>* %p ret void @@ -393,16 +353,16 @@ define void @v7i32(<4 x i32> %a, <4 x i32> %b, <7 x i32>* %p) { ; SSE42-NEXT: movdqa %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v7i32: -; AVX: # BB#0: -; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3] -; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,2] -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,3] -; AVX-NEXT: vmovd %xmm1, 24(%rdi) -; AVX-NEXT: vmovlps %xmm0, 16(%rdi) -; AVX-NEXT: vmovaps %xmm2, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v7i32: +; AVX1: # BB#0: +; AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,2] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,3] +; AVX1-NEXT: vmovd %xmm1, 24(%rdi) +; AVX1-NEXT: vmovlps %xmm0, 16(%rdi) +; AVX1-NEXT: vmovaps %xmm2, (%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v7i32: ; AVX2: # BB#0: @@ -461,15 +421,6 @@ define void @v12i8(<8 x i8> %a, <8 x i8> %b, <12 x i8>* %p) { ; AVX-NEXT: vpextrd $2, %xmm0, 8(%rdi) ; AVX-NEXT: vmovq %xmm0, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v12i8: -; AVX2: # BB#0: -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,xmm1[0],zero,zero,xmm1[2],zero,zero,xmm1[4],zero,zero,xmm1[6,u,u,u,u] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8],zero,xmm0[2,10],zero,xmm0[4,12],zero,xmm0[6,14],zero,xmm0[u,u,u,u] -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovq %xmm0, (%rdi) -; AVX2-NEXT: retq %r = shufflevector <8 x i8> %a, <8 x i8> %b, <12 x i32> store <12 x i8> %r, <12 x i8>* %p ret void @@ -511,18 +462,18 @@ define void @v12i16(<8 x i16> %a, <8 x i16> %b, <12 x i16>* %p) { ; SSE42-NEXT: movq %xmm3, 16(%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v12i16: -; AVX: # BB#0: -; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3] -; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,1,2,3] -; AVX-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,3,1,3,4,5,6,7] -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3],xmm3[4,5,6,7] -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,3] -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,2,3,10,11,10,11,4,5,12,13] -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7] -; AVX-NEXT: vmovdqa %xmm0, (%rdi) -; AVX-NEXT: vmovq %xmm2, 16(%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v12i16: +; AVX1: # BB#0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,3,1,3,4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3],xmm3[4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,3] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,2,3,10,11,10,11,4,5,12,13] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7] +; AVX1-NEXT: vmovdqa %xmm0, (%rdi) +; AVX1-NEXT: vmovq %xmm2, 16(%rdi) +; AVX1-NEXT: retq ; ; AVX2-LABEL: v12i16: ; AVX2: # BB#0: @@ -594,23 +545,23 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) { ; SSE42-NEXT: movdqa %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX-LABEL: v12i32: -; AVX: # BB#0: -; AVX-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] -; AVX-NEXT: vmovsldup {{.*#+}} ymm2 = ymm2[0,0,2,2,4,4,6,6] -; AVX-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[0,u,u,1,5,u,u,6] -; AVX-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2,3,4,5],ymm2[6],ymm3[7] -; AVX-NEXT: vmovddup {{.*#+}} xmm3 = xmm1[0,0] -; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 -; AVX-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1] -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] -; AVX-NEXT: vmovaps %xmm0, 32(%rdi) -; AVX-NEXT: vmovaps %ymm2, (%rdi) -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: v12i32: +; AVX1: # BB#0: +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] +; AVX1-NEXT: vmovsldup {{.*#+}} ymm2 = ymm2[0,0,2,2,4,4,6,6] +; AVX1-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[0,u,u,1,5,u,u,6] +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2,3,4,5],ymm2[6],ymm3[7] +; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = xmm1[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] +; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] +; AVX1-NEXT: vmovaps %xmm0, 32(%rdi) +; AVX1-NEXT: vmovaps %ymm2, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq ; ; AVX2-LABEL: v12i32: ; AVX2: # BB#0: @@ -687,19 +638,6 @@ define void @pr29025(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) { ; AVX-NEXT: vpextrd $2, %xmm0, 8(%rdi) ; AVX-NEXT: vmovq %xmm0, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: pr29025: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,1,2,10,3,4,12,5,6,14,7,u,u,u,u] -; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovq %xmm0, (%rdi) -; AVX2-NEXT: retq %s1 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> %s2 = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> %r = shufflevector <8 x i8> %s1, <8 x i8> %s2, <12 x i32>