[X86][SSE] Cleanup shuffle combining test check prefixes

Share prefixes whenever possible, use X86 instead of X32.

llvm-svn: 350722
This commit is contained in:
Simon Pilgrim 2019-01-09 13:46:14 +00:00
parent 1e0b5c719b
commit fcabfc8666
9 changed files with 815 additions and 1339 deletions

View File

@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX1
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX2
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX2,X86-AVX2
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,AVX512,X86-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX2,X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512,X64-AVX512
;
; Combine tests involving AVX target shuffles
@ -23,37 +23,28 @@ declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8
declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8)
define <4 x float> @combine_vpermilvar_4f32_identity(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_identity:
; X32: # %bb.0:
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_identity:
; X64: # %bb.0:
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
ret <4 x float> %2
}
define <4 x float> @combine_vpermilvar_4f32_movddup(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_movddup:
; X32: # %bb.0:
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_movddup:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_movddup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 1, i32 0, i32 1>)
ret <4 x float> %1
}
define <4 x float> @combine_vpermilvar_4f32_movddup_load(<4 x float> *%a0) {
; X32-LABEL: combine_vpermilvar_4f32_movddup_load:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X32-NEXT: retl
; X86-LABEL: combine_vpermilvar_4f32_movddup_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_movddup_load:
; X64: # %bb.0:
@ -65,119 +56,75 @@ define <4 x float> @combine_vpermilvar_4f32_movddup_load(<4 x float> *%a0) {
}
define <4 x float> @combine_vpermilvar_4f32_movshdup(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_movshdup:
; X32: # %bb.0:
; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_movshdup:
; X64: # %bb.0:
; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_movshdup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 undef, i32 1, i32 3, i32 3>)
ret <4 x float> %1
}
define <4 x float> @combine_vpermilvar_4f32_movsldup(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_movsldup:
; X32: # %bb.0:
; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_movsldup:
; X64: # %bb.0:
; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_movsldup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)
ret <4 x float> %1
}
define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_unpckh:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_unpckh:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_unpckh:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)
ret <4 x float> %1
}
define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_unpckl:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_unpckl:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_unpckl:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)
ret <4 x float> %1
}
define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_8f32_identity:
; X32: # %bb.0:
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_identity:
; X64: # %bb.0:
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_8f32_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 undef>)
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1>)
ret <8 x float> %2
}
define <8 x float> @combine_vpermilvar_8f32_10326u4u(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_8f32_10326u4u:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_10326u4u:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_8f32_10326u4u:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 0, i32 1, i32 2, i32 undef>)
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 undef>)
ret <8 x float> %2
}
define <8 x float> @combine_vpermilvar_vperm2f128_8f32(<8 x float> %a0) {
; X32-AVX1-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X32-AVX1: # %bb.0:
; X32-AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X32-AVX1-NEXT: retl
; AVX1-LABEL: combine_vpermilvar_vperm2f128_8f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX1-NEXT: ret{{[l|q]}}
;
; X32-AVX2-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X32-AVX2-NEXT: retl
; AVX2-LABEL: combine_vpermilvar_vperm2f128_8f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX2-NEXT: ret{{[l|q]}}
;
; X32-AVX512-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X32-AVX512: # %bb.0:
; X32-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X32-AVX512-NEXT: retl
;
; X64-AVX1-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: combine_vpermilvar_vperm2f128_8f32:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; X64-AVX512-NEXT: retq
; AVX512-LABEL: combine_vpermilvar_vperm2f128_8f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
; AVX512-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
%2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
@ -185,15 +132,10 @@ define <8 x float> @combine_vpermilvar_vperm2f128_8f32(<8 x float> %a0) {
}
define <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
; X32: # %bb.0:
; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
; X64: # %bb.0:
; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_vperm2f128_zero_8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
%2 = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
@ -201,19 +143,12 @@ define <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) {
}
define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) {
; X32-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; X32: # %bb.0:
; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; X32-NEXT: vmovapd %xmm0, %xmm0
; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; X32-NEXT: retl
;
; X64-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; X64: # %bb.0:
; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; X64-NEXT: vmovapd %xmm0, %xmm0
; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; X64-NEXT: retq
; CHECK-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; CHECK-NEXT: vmovapd %xmm0, %xmm0
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
%2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%3 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %2, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
@ -221,24 +156,19 @@ define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0
}
define <8 x float> @combine_vpermilvar_8f32_movddup(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_8f32_movddup:
; X32: # %bb.0:
; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_movddup:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_8f32_movddup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>)
ret <8 x float> %1
}
define <8 x float> @combine_vpermilvar_8f32_movddup_load(<8 x float> *%a0) {
; X32-LABEL: combine_vpermilvar_8f32_movddup_load:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
; X32-NEXT: retl
; X86-LABEL: combine_vpermilvar_8f32_movddup_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_movddup_load:
; X64: # %bb.0:
@ -250,97 +180,64 @@ define <8 x float> @combine_vpermilvar_8f32_movddup_load(<8 x float> *%a0) {
}
define <8 x float> @combine_vpermilvar_8f32_movshdup(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_8f32_movshdup:
; X32: # %bb.0:
; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_movshdup:
; X64: # %bb.0:
; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_8f32_movshdup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 5, i32 7, i32 7>)
ret <8 x float> %1
}
define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_8f32_movsldup:
; X32: # %bb.0:
; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_movsldup:
; X64: # %bb.0:
; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_8f32_movsldup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>)
ret <8 x float> %1
}
define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
; X32-LABEL: combine_vpermilvar_2f64_identity:
; X32: # %bb.0:
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_2f64_identity:
; X64: # %bb.0:
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_2f64_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>)
%2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> <i64 2, i64 0>)
ret <2 x double> %2
}
define <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) {
; X32-LABEL: combine_vpermilvar_2f64_movddup:
; X32: # %bb.0:
; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_2f64_movddup:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_2f64_movddup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
ret <2 x double> %1
}
define <4 x double> @combine_vpermilvar_4f64_identity(<4 x double> %a0) {
; X32-LABEL: combine_vpermilvar_4f64_identity:
; X32: # %bb.0:
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f64_identity:
; X64: # %bb.0:
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f64_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
%2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> <i64 2, i64 0, i64 2, i64 0>)
ret <4 x double> %2
}
define <4 x double> @combine_vpermilvar_4f64_movddup(<4 x double> %a0) {
; X32-LABEL: combine_vpermilvar_4f64_movddup:
; X32: # %bb.0:
; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f64_movddup:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f64_movddup:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)
ret <4 x double> %1
}
define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_4stage:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_4stage:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_4stage:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)
%3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)
@ -349,15 +246,10 @@ define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {
}
define <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {
; X32-LABEL: combine_vpermilvar_8f32_4stage:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_8f32_4stage:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_8f32_4stage:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)
%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)
@ -366,119 +258,94 @@ define <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {
}
define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
; X32-LABEL: combine_vpermilvar_4f32_as_insertps:
; X32: # %bb.0:
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_4f32_as_insertps:
; X64: # %bb.0:
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermilvar_4f32_as_insertps:
; CHECK: # %bb.0:
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
%2 = shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 1, i32 4>
ret <4 x float> %2
}
define <2 x double> @constant_fold_vpermilvar_pd() {
; X32-LABEL: constant_fold_vpermilvar_pd:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,1.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_pd:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,1.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermilvar_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,1.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> <double 1.0, double 2.0>, <2 x i64> <i64 2, i64 0>)
ret <2 x double> %1
}
define <4 x double> @constant_fold_vpermilvar_pd_256() {
; X32-LABEL: constant_fold_vpermilvar_pd_256:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,1.0E+0,3.0E+0,4.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_pd_256:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,1.0E+0,3.0E+0,4.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermilvar_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,1.0E+0,3.0E+0,4.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
ret <4 x double> %1
}
define <4 x float> @constant_fold_vpermilvar_ps() {
; X32-LABEL: constant_fold_vpermilvar_ps:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,1.0E+0,3.0E+0,2.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_ps:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,1.0E+0,3.0E+0,2.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermilvar_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,1.0E+0,3.0E+0,2.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x i32> <i32 3, i32 0, i32 2, i32 1>)
ret <4 x float> %1
}
define <8 x float> @constant_fold_vpermilvar_ps_256() {
; X32-LABEL: constant_fold_vpermilvar_ps_256:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1.0E+0,1.0E+0,3.0E+0,2.0E+0,5.0E+0,6.0E+0,6.0E+0,6.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_ps_256:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1.0E+0,1.0E+0,3.0E+0,2.0E+0,5.0E+0,6.0E+0,6.0E+0,6.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermilvar_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [1.0E+0,1.0E+0,3.0E+0,2.0E+0,5.0E+0,6.0E+0,6.0E+0,6.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 1>)
ret <8 x float> %1
}
define void @PR39483() {
; X32-AVX1-LABEL: PR39483:
; X32-AVX1: # %bb.0: # %entry
; X32-AVX1-NEXT: vmovups 32, %ymm0
; X32-AVX1-NEXT: vmovups 64, %xmm1
; X32-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,3]
; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; X32-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X32-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
; X32-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,3]
; X32-AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X32-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
; X32-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
; X32-AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovups %ymm0, (%eax)
; X86-AVX1-LABEL: PR39483:
; X86-AVX1: # %bb.0: # %entry
; X86-AVX1-NEXT: vmovups 32, %ymm0
; X86-AVX1-NEXT: vmovups 64, %xmm1
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,3]
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,3]
; X86-AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
; X86-AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86-AVX1-NEXT: vmovups %ymm0, (%eax)
;
; X32-AVX2-LABEL: PR39483:
; X32-AVX2: # %bb.0: # %entry
; X32-AVX2-NEXT: vmovups 32, %ymm0
; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
; X32-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
; X32-AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X32-AVX2-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,1,0,3,4,5,4,7]
; X32-AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: vmovups %ymm0, (%eax)
; X86-AVX2-LABEL: PR39483:
; X86-AVX2: # %bb.0: # %entry
; X86-AVX2-NEXT: vmovups 32, %ymm0
; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3,4],mem[5],ymm0[6,7]
; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <2,5,0,3,6,u,u,u>
; X86-AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,1,0,3,4,5,4,7]
; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
; X86-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vmovups %ymm0, (%eax)
;
; X32-AVX512-LABEL: PR39483:
; X32-AVX512: # %bb.0: # %entry
; X32-AVX512-NEXT: vmovups 0, %zmm0
; X32-AVX512-NEXT: vmovups 64, %ymm1
; X32-AVX512-NEXT: vmovaps {{.*#+}} zmm2 = <2,5,8,11,14,17,20,23,u,u,u,u,u,u,u,u>
; X32-AVX512-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2
; X32-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm1
; X32-AVX512-NEXT: vaddps %ymm0, %ymm1, %ymm0
; X32-AVX512-NEXT: vmovups %ymm0, (%eax)
; X86-AVX512-LABEL: PR39483:
; X86-AVX512: # %bb.0: # %entry
; X86-AVX512-NEXT: vmovups 0, %zmm0
; X86-AVX512-NEXT: vmovups 64, %ymm1
; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm2 = <2,5,8,11,14,17,20,23,u,u,u,u,u,u,u,u>
; X86-AVX512-NEXT: vpermi2ps %zmm1, %zmm0, %zmm2
; X86-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X86-AVX512-NEXT: vmulps %ymm0, %ymm2, %ymm1
; X86-AVX512-NEXT: vaddps %ymm0, %ymm1, %ymm0
; X86-AVX512-NEXT: vmovups %ymm0, (%eax)
;
; X64-AVX1-LABEL: PR39483:
; X64-AVX1: # %bb.0: # %entry

File diff suppressed because it is too large Load Diff

View File

@ -1,31 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
define <16 x i16> @combine_vpermt2var_16i16_identity(<16 x i16> %x0, <16 x i16> %x1) {
; X32-LABEL: combine_vpermt2var_16i16_identity:
; X32: # %bb.0:
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i16_identity:
; X64: # %bb.0:
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermt2var_16i16_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %x0, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 30, i16 13, i16 28, i16 11, i16 26, i16 9, i16 24, i16 7, i16 22, i16 5, i16 20, i16 3, i16 18, i16 1, i16 16>, <16 x i16> %res0, <16 x i16> %res0, i16 -1)
ret <16 x i16> %res1
}
define <16 x i16> @combine_vpermt2var_16i16_identity_mask(<16 x i16> %x0, <16 x i16> %x1, i16 %m) {
; X32-LABEL: combine_vpermt2var_16i16_identity_mask:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpermi2w %ymm0, %ymm0, %ymm1 {%k1} {z}
; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X32-NEXT: vpermi2w %ymm1, %ymm1, %ymm0 {%k1} {z}
; X32-NEXT: retl
; X86-LABEL: combine_vpermt2var_16i16_identity_mask:
; X86: # %bb.0:
; X86-NEXT: vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermi2w %ymm0, %ymm0, %ymm1 {%k1} {z}
; X86-NEXT: vmovdqa {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X86-NEXT: vpermi2w %ymm1, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i16_identity_mask:
; X64: # %bb.0:
@ -41,63 +37,41 @@ define <16 x i16> @combine_vpermt2var_16i16_identity_mask(<16 x i16> %x0, <16 x
}
define <16 x i16> @combine_vpermi2var_16i16_as_permw(<16 x i16> %x0, <16 x i16> %x1) {
; X32-LABEL: combine_vpermi2var_16i16_as_permw:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; X32-NEXT: vpermw %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_16i16_as_permw:
; X64: # %bb.0:
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermi2var_16i16_as_permw:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %res0, <16 x i16> <i16 0, i16 15, i16 1, i16 14, i16 2, i16 13, i16 3, i16 12, i16 4, i16 11, i16 5, i16 10, i16 6, i16 9, i16 7, i16 8>, <16 x i16> %res0, i16 -1)
ret <16 x i16> %res1
}
define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_vperm2(<16 x i16> %x0, <16 x i16> %x1) {
; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2:
; X64: # %bb.0:
; X64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; CHECK-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> <i16 0, i16 31, i16 2, i16 29, i16 4, i16 27, i16 6, i16 25, i16 8, i16 23, i16 10, i16 21, i16 12, i16 19, i16 14, i16 17>, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 0, i16 17, i16 2, i16 18, i16 4, i16 19, i16 6, i16 21, i16 8, i16 23, i16 10, i16 25, i16 12, i16 27, i16 14, i16 29>, <16 x i16> %res0, <16 x i16> %res0, i16 -1)
ret <16 x i16> %res1
}
define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpckhwd(<16 x i16> %a0, <16 x i16> %a1) {
; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd:
; X32: # %bb.0:
; X32-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd:
; X64: # %bb.0:
; X64-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %a0, <16 x i16> <i16 20, i16 4, i16 21, i16 5, i16 22, i16 6, i16 23, i16 7, i16 28, i16 12, i16 29, i16 13, i16 30, i16 14, i16 31, i16 15>, <16 x i16> %a1, i16 -1)
ret <16 x i16> %res0
}
define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpcklwd(<16 x i16> %a0, <16 x i16> %a1) {
; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd:
; X32: # %bb.0:
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd:
; X64: # %bb.0:
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 0, i16 16, i16 1, i16 17, i16 2, i16 18, i16 3, i16 19, i16 8, i16 24, i16 9, i16 25, i16 10, i16 26, i16 11, i16 27>, <16 x i16> %a0, <16 x i16> %a1, i16 -1)
ret <16 x i16> %res0
}

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
@ -19,26 +19,22 @@ declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
define <16 x i8> @combine_vpermt2var_16i8_identity(<16 x i8> %x0, <16 x i8> %x1) {
; X32-LABEL: combine_vpermt2var_16i8_identity:
; X32: # %bb.0:
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i8_identity:
; X64: # %bb.0:
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermt2var_16i8_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %x0, <16 x i8> %x1, i16 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 30, i8 13, i8 28, i8 11, i8 26, i8 9, i8 24, i8 7, i8 22, i8 5, i8 20, i8 3, i8 18, i8 1, i8 16>, <16 x i8> %res0, <16 x i8> %res0, i16 -1)
ret <16 x i8> %res1
}
define <16 x i8> @combine_vpermt2var_16i8_identity_mask(<16 x i8> %x0, <16 x i8> %x1, i16 %m) {
; X32-LABEL: combine_vpermt2var_16i8_identity_mask:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpermi2b %xmm0, %xmm0, %xmm1 {%k1} {z}
; X32-NEXT: vmovdqa {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X32-NEXT: vpermi2b %xmm1, %xmm1, %xmm0 {%k1} {z}
; X32-NEXT: retl
; X86-LABEL: combine_vpermt2var_16i8_identity_mask:
; X86: # %bb.0:
; X86-NEXT: vmovdqa {{.*#+}} xmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermi2b %xmm0, %xmm0, %xmm1 {%k1} {z}
; X86-NEXT: vmovdqa {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X86-NEXT: vpermi2b %xmm1, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i8_identity_mask:
; X64: # %bb.0:
@ -54,100 +50,63 @@ define <16 x i8> @combine_vpermt2var_16i8_identity_mask(<16 x i8> %x0, <16 x i8>
}
define <16 x i8> @combine_vpermi2var_16i8_as_vpshufb(<16 x i8> %x0, <16 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_16i8_as_vpshufb:
; X32: # %bb.0:
; X32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_16i8_as_vpshufb:
; X64: # %bb.0:
; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermi2var_16i8_as_vpshufb:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %x1, i16 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 15, i8 1, i8 14, i8 2, i8 13, i8 3, i8 12, i8 4, i8 11, i8 5, i8 10, i8 6, i8 9, i8 7, i8 8>, <16 x i8> %res0, i16 -1)
ret <16 x i8> %res1
}
define <32 x i8> @combine_vpermi2var_32i8_as_vpermb(<32 x i8> %x0, <32 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_32i8_as_vpermb:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X32-NEXT: vpermb %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_32i8_as_vpermb:
; X64: # %bb.0:
; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X64-NEXT: vpermb %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermi2var_32i8_as_vpermb:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %res0, <32 x i8> <i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22>, <32 x i8> %res0, i32 -1)
ret <32 x i8> %res1
}
define <64 x i8> @combine_vpermi2var_64i8_as_vpermb(<64 x i8> %x0, <64 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_64i8_as_vpermb:
; X32: # %bb.0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X32-NEXT: vpermb %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_64i8_as_vpermb:
; X64: # %bb.0:
; X64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X64-NEXT: vpermb %zmm0, %zmm1, %zmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermi2var_64i8_as_vpermb:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <64 x i8> %x0, <64 x i8> %x1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %res0, <64 x i8> <i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22>, <64 x i8> %res0, i64 -1)
ret <64 x i8> %res1
}
define <16 x i8> @combine_vpermt2var_vpermi2var_16i8_as_vperm2(<16 x i8> %x0, <16 x i8> %x1) {
; X32-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
; X32-NEXT: vmovdqa {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2:
; X64: # %bb.0:
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
; X64-NEXT: vmovdqa {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; CHECK-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
; CHECK-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> <i8 0, i8 31, i8 2, i8 29, i8 4, i8 27, i8 6, i8 25, i8 8, i8 23, i8 10, i8 21, i8 12, i8 19, i8 14, i8 17>, <16 x i8> %x1, i16 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 0, i8 17, i8 2, i8 18, i8 4, i8 19, i8 6, i8 21, i8 8, i8 23, i8 10, i8 25, i8 12, i8 27, i8 14, i8 29>, <16 x i8> %res0, <16 x i8> %res0, i16 -1)
ret <16 x i8> %res1
}
define <32 x i8> @combine_vpermi2var_32i8_as_vperm2(<32 x i8> %x0, <32 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_32i8_as_vperm2:
; X32: # %bb.0:
; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X32-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_32i8_as_vperm2:
; X64: # %bb.0:
; X64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X64-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermi2var_32i8_as_vperm2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %res0, <32 x i8> <i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22>, <32 x i8> %x1, i32 -1)
ret <32 x i8> %res1
}
define <64 x i8> @combine_vpermi2var_64i8_as_vperm2(<64 x i8> %x0, <64 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_64i8_as_vperm2:
; X32: # %bb.0:
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,80,1,70,2,54,3,49,4,36,5,23,6,18,7,5,0,90,1,100,2,110,3,120,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X32-NEXT: vpermt2b %zmm1, %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_64i8_as_vperm2:
; X64: # %bb.0:
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,80,1,70,2,54,3,49,4,36,5,23,6,18,7,5,0,90,1,100,2,110,3,120,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X64-NEXT: vpermt2b %zmm1, %zmm2, %zmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermi2var_64i8_as_vperm2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,80,1,70,2,54,3,49,4,36,5,23,6,18,7,5,0,90,1,100,2,110,3,120,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; CHECK-NEXT: vpermt2b %zmm1, %zmm2, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <64 x i8> %x0, <64 x i8> %x1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119>
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %res0, <64 x i8> <i8 0, i8 80, i8 2, i8 70, i8 4, i8 60, i8 6, i8 50, i8 8, i8 40, i8 10, i8 30, i8 12, i8 20, i8 14, i8 10, i8 0, i8 90, i8 2, i8 100, i8 4, i8 110, i8 6, i8 120, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22, i8 0, i8 32, i8 2, i8 30, i8 4, i8 28, i8 6, i8 26, i8 8, i8 28, i8 10, i8 26, i8 12, i8 24, i8 14, i8 22>, <64 x i8> %x1, i64 -1)
ret <64 x i8> %res1

View File

@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F
;
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F
; Combine tests involving SSE41 target shuffles (BLEND,INSERTPS,MOVZX)
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)

View File

@ -1,28 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,+sse4a| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2,+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,+sse4a| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+sse4a | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
;
; Combine tests involving SSE4A target shuffles (EXTRQI,INSERTQI)
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
; ALL-LABEL: combine_extrqi_pshufb_16i8:
; ALL: # %bb.0:
; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
; CHECK-LABEL: combine_extrqi_pshufb_16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; CHECK-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
ret <16 x i8> %2
}
define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
; ALL-LABEL: combine_extrqi_pshufb_8i16:
; ALL: # %bb.0:
; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
; CHECK-LABEL: combine_extrqi_pshufb_8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; CHECK-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = bitcast <8 x i16> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@ -75,10 +75,10 @@ define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
}
define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
; ALL-LABEL: combine_pshufb_insertqi_pshufb:
; ALL: # %bb.0:
; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshufb_insertqi_pshufb:
; CHECK: # %bb.0:
; CHECK-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
; CHECK-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
%2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)

View File

@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512F
;
; Combine tests involving SSE3/SSSE3 target shuffles (MOVDDUP, MOVSHDUP, MOVSLDUP, PSHUFB)
@ -457,9 +457,9 @@ define <16 x i8> @combine_pshufb_as_unary_unpckhwd(<16 x i8> %a0) {
}
define <8 x i16> @combine_pshufb_as_unpacklo_undef(<16 x i8> %a0) {
; ALL-LABEL: combine_pshufb_as_unpacklo_undef:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshufb_as_unpacklo_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 undef, i8 0, i8 1, i8 undef, i8 undef, i8 2, i8 3, i8 undef, i8 undef, i8 4, i8 5, i8 undef, i8 undef, i8 6, i8 7>)
%2 = bitcast <16 x i8> %1 to <8 x i16>
%3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -467,9 +467,9 @@ define <8 x i16> @combine_pshufb_as_unpacklo_undef(<16 x i8> %a0) {
}
define <16 x i8> @combine_pshufb_as_unpackhi_undef(<16 x i8> %a0) {
; ALL-LABEL: combine_pshufb_as_unpackhi_undef:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshufb_as_unpackhi_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 8, i8 undef, i8 9, i8 undef, i8 10, i8 undef, i8 11, i8 undef, i8 12, i8 undef, i8 13, i8 undef, i8 14, i8 undef, i8 15, i8 undef>)
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
ret <16 x i8> %2

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32 --check-prefix=X86AVX
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=X32 --check-prefix=X86AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64 --check-prefix=X64AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=X64 --check-prefix=X64AVX2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefixes=CHECK,X86,AVX2,X86-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefixes=CHECK,X64,AVX2,X64-AVX2
declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone
declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone
@ -13,211 +13,138 @@ declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x
declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: combine_vpermil2pd_identity:
; X32: # %bb.0:
; X32-NEXT: vmovaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd_identity:
; X64: # %bb.0:
; X64-NEXT: vmovaps %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2pd_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a1, <2 x double> %a0, <2 x i64> <i64 2, i64 0>, i8 0)
%res1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %res0, <2 x double> undef, <2 x i64> <i64 2, i64 0>, i8 0)
ret <2 x double> %res1
}
define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) {
; X32-LABEL: combine_vpermil2pd256_identity:
; X32: # %bb.0:
; X32-NEXT: vmovaps %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd256_identity:
; X64: # %bb.0:
; X64-NEXT: vmovaps %ymm1, %ymm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2pd256_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
ret <4 x double> %res1
}
define <4 x double> @combine_vpermil2pd256_0z73(<4 x double> %a0, <4 x double> %a1) {
; X32-LABEL: combine_vpermil2pd256_0z73:
; X32: # %bb.0:
; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],zero,ymm1[3],ymm0[3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd256_0z73:
; X64: # %bb.0:
; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],zero,ymm1[3],ymm0[3]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2pd256_0z73:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],zero,ymm1[3],ymm0[3]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 undef, i32 7, i32 3>
%res1 = shufflevector <4 x double> %res0, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x double> %res1
}
define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {
; X32-LABEL: combine_vpermil2ps_identity:
; X32: # %bb.0:
; X32-NEXT: vmovaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps_identity:
; X64: # %bb.0:
; X64-NEXT: vmovaps %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
%res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
ret <4 x float> %res1
}
define <4 x float> @combine_vpermil2ps_1z74(<4 x float> %a0, <4 x float> %a1) {
; X32-LABEL: combine_vpermil2ps_1z74:
; X32: # %bb.0:
; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,0]
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps_1z74:
; X64: # %bb.0:
; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,0]
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps_1z74:
; CHECK: # %bb.0:
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[3,0]
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 1, i32 1, i32 7, i32 4>, i8 0)
%res1 = shufflevector <4 x float> %res0, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x float> %res1
}
define <4 x float> @combine_vpermil2ps_02zu(<4 x float> %a0, <4 x float> %a1) {
; X32-LABEL: combine_vpermil2ps_02zu:
; X32: # %bb.0:
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps_02zu:
; X64: # %bb.0:
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps_02zu:
; CHECK: # %bb.0:
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 undef>, i8 0)
ret <4 x float> %res0
}
define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {
; X32-LABEL: combine_vpermil2ps256_identity:
; X32: # %bb.0:
; X32-NEXT: vmovaps %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps256_identity:
; X64: # %bb.0:
; X64-NEXT: vmovaps %ymm1, %ymm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps256_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
%res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
ret <8 x float> %res1
}
define <8 x float> @combine_vpermil2ps256_08z945Az(<8 x float> %a0, <8 x float> %a1) {
; X32-LABEL: combine_vpermil2ps256_08z945Az:
; X32: # %bb.0:
; X32-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[0],zero,ymm1[1],ymm0[4,5],ymm1[6],zero
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps256_08z945Az:
; X64: # %bb.0:
; X64-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[0],zero,ymm1[1],ymm0[4,5],ymm1[6],zero
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps256_08z945Az:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm0[0],ymm1[0],zero,ymm1[1],ymm0[4,5],ymm1[6],zero
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 0, i32 1, i32 6, i32 7>, i8 0)
%res1 = shufflevector <8 x float> %res0, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 8>
ret <8 x float> %res1
}
define <8 x float> @combine_vpermil2ps256_zero(<8 x float> %a0, <8 x float> %a1) {
; X32-LABEL: combine_vpermil2ps256_zero:
; X32: # %bb.0:
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps256_zero:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps256_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>, i8 2)
ret <8 x float> %res0
}
define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {
; X32-LABEL: combine_vpermil2ps_blend_with_zero:
; X32: # %bb.0:
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2ps_blend_with_zero:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2)
ret <4 x float> %res0
}
define <2 x double> @combine_vpermil2pd_as_shufpd(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: combine_vpermil2pd_as_shufpd:
; X32: # %bb.0:
; X32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd_as_shufpd:
; X64: # %bb.0:
; X64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2pd_as_shufpd:
; CHECK: # %bb.0:
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 2, i64 4>, i8 0)
ret <2 x double> %res0
}
define <4 x double> @combine_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x double> %a1) {
; X32-LABEL: combine_vpermil2pd256_as_shufpd:
; X32: # %bb.0:
; X32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd256_as_shufpd:
; X64: # %bb.0:
; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpermil2pd256_as_shufpd:
; CHECK: # %bb.0:
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i8 0)
ret <4 x double> %res0
}
define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_identity:
; X32: # %bb.0:
; X32-NEXT: vmovaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_identity:
; X64: # %bb.0:
; X64-NEXT: vmovaps %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_identity:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16>)
%res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %res1
}
define <16 x i8> @combine_vpperm_zero(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_zero:
; X32: # %bb.0:
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_zero:
; X64: # %bb.0:
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
%res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
@ -225,10 +152,10 @@ define <16 x i8> @combine_vpperm_zero(<16 x i8> %a0, <16 x i8> %a1) {
}
define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_identity_bitcast:
; X32: # %bb.0:
; X32-NEXT: vpaddq {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: retl
; X86-LABEL: combine_vpperm_identity_bitcast:
; X86: # %bb.0:
; X86-NEXT: vpaddq {{\.LCPI.*}}, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: combine_vpperm_identity_bitcast:
; X64: # %bb.0:
@ -244,73 +171,47 @@ define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1)
}
define <16 x i8> @combine_vpperm_as_blend_with_zero(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_as_blend_with_zero:
; X32: # %bb.0:
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_as_blend_with_zero:
; X64: # %bb.0:
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6,7]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_as_blend_with_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6,7]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 0, i8 1, i8 128, i8 129, i8 4, i8 5, i8 6, i8 7, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137>)
ret <16 x i8> %res0
}
define <16 x i8> @combine_vpperm_as_unary_unpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_as_unary_unpckhbw:
; X32: # %bb.0:
; X32-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_as_unary_unpckhbw:
; X64: # %bb.0:
; X64-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_as_unary_unpckhbw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 undef, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
ret <16 x i8> %res0
}
define <16 x i8> @combine_vpperm_as_unpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_as_unpckhbw:
; X32: # %bb.0:
; X32-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_as_unpckhbw:
; X64: # %bb.0:
; X64-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_as_unpckhbw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
ret <16 x i8> %res0
}
define <16 x i8> @combine_vpperm_as_unpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
; X32-LABEL: combine_vpperm_as_unpcklbw:
; X32: # %bb.0:
; X32-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_as_unpcklbw:
; X64: # %bb.0:
; X64-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_as_unpcklbw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 16, i8 0, i8 17, i8 1, i8 18, i8 2, i8 19, i8 3, i8 20, i8 4, i8 21, i8 5, i8 22, i8 6, i8 23, i8 7>)
ret <16 x i8> %res0
}
define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
; X32-LABEL: combine_vpperm_10zz32BA:
; X32: # %bb.0:
; X32-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],zero,zero,zero,zero,xmm0[6,7,4,5],xmm1[6,7,4,5]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpperm_10zz32BA:
; X64: # %bb.0:
; X64-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],zero,zero,zero,zero,xmm0[6,7,4,5],xmm1[6,7,4,5]
; X64-NEXT: retq
; CHECK-LABEL: combine_vpperm_10zz32BA:
; CHECK: # %bb.0:
; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2,3,0,1],zero,zero,zero,zero,xmm0[6,7,4,5],xmm1[6,7,4,5]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%res1 = bitcast <4 x i32> %res0 to <16 x i8>
%res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 2, i8 3, i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
@ -320,34 +221,34 @@ define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
; FIXME: Duplicated load in i686
define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
; X86AVX-LABEL: buildvector_v4f32_0404:
; X86AVX: # %bb.0:
; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; X86AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X86AVX-NEXT: vmovaps %xmm0, (%eax)
; X86AVX-NEXT: retl
; X86-AVX-LABEL: buildvector_v4f32_0404:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X86-AVX-NEXT: vmovaps %xmm0, (%eax)
; X86-AVX-NEXT: retl
;
; X86AVX2-LABEL: buildvector_v4f32_0404:
; X86AVX2: # %bb.0:
; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X86AVX2-NEXT: vmovapd %xmm0, (%eax)
; X86AVX2-NEXT: retl
; X86-AVX2-LABEL: buildvector_v4f32_0404:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X86-AVX2-NEXT: vmovapd %xmm0, (%eax)
; X86-AVX2-NEXT: retl
;
; X64AVX-LABEL: buildvector_v4f32_0404:
; X64AVX: # %bb.0:
; X64AVX-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0]
; X64AVX-NEXT: vmovaps %xmm0, (%rdi)
; X64AVX-NEXT: retq
; X64-AVX-LABEL: buildvector_v4f32_0404:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0]
; X64-AVX-NEXT: vmovaps %xmm0, (%rdi)
; X64-AVX-NEXT: retq
;
; X64AVX2-LABEL: buildvector_v4f32_0404:
; X64AVX2: # %bb.0:
; X64AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64AVX2-NEXT: vmovapd %xmm0, (%rdi)
; X64AVX2-NEXT: retq
; X64-AVX2-LABEL: buildvector_v4f32_0404:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; X64-AVX2-NEXT: vmovapd %xmm0, (%rdi)
; X64-AVX2-NEXT: retq
%v0 = insertelement <4 x float> undef, float %a, i32 0
%v1 = insertelement <4 x float> %v0, float %b, i32 1
%v2 = insertelement <4 x float> %v1, float %a, i32 2
@ -357,13 +258,13 @@ define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
}
define void @buildvector_v4f32_07z6(float %a, <4 x float> %b, <4 x float>* %ptr) {
; X32-LABEL: buildvector_v4f32_07z6:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm1[0],xmm0[3],zero,xmm0[2]
; X32-NEXT: vmovaps %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: buildvector_v4f32_07z6:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm1[0],xmm0[3],zero,xmm0[2]
; X86-NEXT: vmovaps %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: buildvector_v4f32_07z6:
; X64: # %bb.0:
@ -381,82 +282,57 @@ define void @buildvector_v4f32_07z6(float %a, <4 x float> %b, <4 x float>* %ptr)
}
define <2 x double> @constant_fold_vpermil2pd() {
; X32-LABEL: constant_fold_vpermil2pd:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2pd:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermil2pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,2.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> <double 1.0, double 2.0>, <2 x double> <double -2.0, double -1.0>, <2 x i64> <i64 4, i64 2>, i8 2)
ret <2 x double> %1
}
define <4 x double> @constant_fold_vpermil2pd_256() {
; X32-LABEL: constant_fold_vpermil2pd_256:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-4.0E+0,0.0E+0,4.0E+0,3.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2pd_256:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-4.0E+0,0.0E+0,4.0E+0,3.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermil2pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [-4.0E+0,0.0E+0,4.0E+0,3.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x double> <double -4.0, double -3.0, double -2.0, double -1.0>, <4 x i64> <i64 4, i64 8, i64 2, i64 0>, i8 2)
ret <4 x double> %1
}
define <4 x float> @constant_fold_vpermil2ps() {
; X32-LABEL: constant_fold_vpermil2ps:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,1.0E+0,3.0E+0,0.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2ps:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,1.0E+0,3.0E+0,0.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermil2ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-4.0E+0,1.0E+0,3.0E+0,0.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float> <float -4.0, float -3.0, float -2.0, float -1.0>, <4 x i32> <i32 4, i32 0, i32 2, i32 8>, i8 2)
ret <4 x float> %1
}
define <8 x float> @constant_fold_vpermil2ps_256() {
; X32-LABEL: constant_fold_vpermil2ps_256:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-8.0E+0,1.0E+0,3.0E+0,0.0E+0,5.0E+0,0.0E+0,5.0E+0,7.0E+0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermil2ps_256:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-8.0E+0,1.0E+0,3.0E+0,0.0E+0,5.0E+0,0.0E+0,5.0E+0,7.0E+0]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpermil2ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [-8.0E+0,1.0E+0,3.0E+0,0.0E+0,5.0E+0,0.0E+0,5.0E+0,7.0E+0]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x float> <float -8.0, float -7.0, float -6.0, float -5.0, float -4.0, float -3.0, float -2.0, float -1.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 8, i32 0, i32 8, i32 0, i32 2>, i8 2)
ret <8 x float> %1
}
define <16 x i8> @constant_fold_vpperm() {
; X32-LABEL: constant_fold_vpperm:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpperm:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; X64-NEXT: retq
; CHECK-LABEL: constant_fold_vpperm:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; CHECK-NEXT: ret{{[l|q]}}
%1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15>, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16>)
ret <16 x i8> %1
}
define <4 x float> @PR31296(i8* %in) {
; X32-LABEL: PR31296:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
; X32-NEXT: retl
; X86-LABEL: PR31296:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
; X86-NEXT: retl
;
; X64-LABEL: PR31296:
; X64: # %bb.0: # %entry

View File

@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-FAST
;
; Verify that the DAG combiner correctly folds bitwise operations across
; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
@ -18,9 +18,9 @@ declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
; ALL-LABEL: combine_pshufd1:
; ALL: # %bb.0: # %entry
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshufd1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: retq
entry:
%b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
%c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
@ -28,9 +28,9 @@ entry:
}
define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
; ALL-LABEL: combine_pshufd2:
; ALL: # %bb.0: # %entry
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshufd2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: retq
entry:
%b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
%b.cast = bitcast <4 x i32> %b to <8 x i16>
@ -41,9 +41,9 @@ entry:
}
define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
; ALL-LABEL: combine_pshufd3:
; ALL: # %bb.0: # %entry
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshufd3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: retq
entry:
%b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
%b.cast = bitcast <4 x i32> %b to <8 x i16>
@ -113,9 +113,9 @@ entry:
}
define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
; ALL-LABEL: combine_pshuflw1:
; ALL: # %bb.0: # %entry
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshuflw1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: retq
entry:
%b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
%c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
@ -123,9 +123,9 @@ entry:
}
define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
; ALL-LABEL: combine_pshuflw2:
; ALL: # %bb.0: # %entry
; ALL-NEXT: retq
; CHECK-LABEL: combine_pshuflw2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: retq
entry:
%b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
%c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28)
@ -811,9 +811,9 @@ define <4 x i32> @combine_nested_undef_test12(<4 x i32> %A, <4 x i32> %B) {
; The following pair of shuffles is folded into vector %A.
define <4 x i32> @combine_nested_undef_test13(<4 x i32> %A, <4 x i32> %B) {
; ALL-LABEL: combine_nested_undef_test13:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_nested_undef_test13:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
ret <4 x i32> %2
@ -1371,9 +1371,9 @@ define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) {
}
define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) {
; ALL-LABEL: combine_test11:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_test11:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x float> %2
@ -1464,9 +1464,9 @@ define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) {
}
define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) {
; ALL-LABEL: combine_test16:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_test16:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x i32> %2
@ -2161,9 +2161,9 @@ define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
define <4 x float> @combine_undef_input_test6(<4 x float> %a) {
; ALL-LABEL: combine_undef_input_test6:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_undef_input_test6:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
ret <4 x float> %2
@ -2235,9 +2235,9 @@ define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
}
define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
; ALL-LABEL: combine_undef_input_test10:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_undef_input_test10:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
ret <4 x float> %2
@ -2351,9 +2351,9 @@ define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; combined into a single legal shuffle operation.
define <4 x float> @combine_undef_input_test16(<4 x float> %a) {
; ALL-LABEL: combine_undef_input_test16:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_undef_input_test16:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
ret <4 x float> %2
@ -2425,9 +2425,9 @@ define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
}
define <4 x float> @combine_undef_input_test20(<4 x float> %a) {
; ALL-LABEL: combine_undef_input_test20:
; ALL: # %bb.0:
; ALL-NEXT: retq
; CHECK-LABEL: combine_undef_input_test20:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
ret <4 x float> %2