forked from OSchip/llvm-project
[X86][XOP] createVariablePermute - use VPERMIL2 for v8i32/v4i64 variable permutes
llvm-svn: 327222
This commit is contained in:
parent
5fdbdeb542
commit
de7f3f0f91
|
@ -8023,12 +8023,33 @@ SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
|
|||
case MVT::v8i32:
|
||||
if (Subtarget.hasAVX2())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
else if (Subtarget.hasXOP()) {
|
||||
SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec);
|
||||
SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
|
||||
{0, 1, 2, 3, 0, 1, 2, 3});
|
||||
SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
|
||||
{4, 5, 6, 7, 4, 5, 6, 7});
|
||||
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
|
||||
LoLo, HiHi, IndicesVec,
|
||||
DAG.getConstant(0, DL, MVT::i8)));
|
||||
}
|
||||
break;
|
||||
case MVT::v4i64:
|
||||
case MVT::v4f64:
|
||||
if (Subtarget.hasVLX())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
else if (Subtarget.hasAVX2()) {
|
||||
else if (Subtarget.hasXOP()) {
|
||||
SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec);
|
||||
SDValue LoLo =
|
||||
DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1});
|
||||
SDValue HiHi =
|
||||
DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3});
|
||||
// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
|
||||
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
|
||||
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
|
||||
LoLo, HiHi, IndicesVec,
|
||||
DAG.getConstant(0, DL, MVT::i8)));
|
||||
} else if (Subtarget.hasAVX2()) {
|
||||
Opcode = X86ISD::VPERMV;
|
||||
ShuffleVT = MVT::v8f32;
|
||||
}
|
||||
|
|
|
@ -12,29 +12,13 @@
|
|||
define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v4i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: pushq %rbp
|
||||
; XOP-NEXT: movq %rsp, %rbp
|
||||
; XOP-NEXT: andq $-32, %rsp
|
||||
; XOP-NEXT: subq $64, %rsp
|
||||
; XOP-NEXT: vmovq %xmm1, %rax
|
||||
; XOP-NEXT: andl $3, %eax
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; XOP-NEXT: andl $3, %ecx
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm3
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; XOP-NEXT: vmovq %xmm1, %rdx
|
||||
; XOP-NEXT: andl $3, %edx
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; XOP-NEXT: andl $3, %esi
|
||||
; XOP-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; XOP-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; XOP-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: movq %rbp, %rsp
|
||||
; XOP-NEXT: popq %rbp
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; XOP-NEXT: vpermil2pd $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v4i64:
|
||||
|
@ -133,39 +117,9 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
|
|||
define <8 x i32> @var_shuffle_v8i32(<8 x i32> %v, <8 x i32> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v8i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: pushq %rbp
|
||||
; XOP-NEXT: movq %rsp, %rbp
|
||||
; XOP-NEXT: andq $-32, %rsp
|
||||
; XOP-NEXT: subq $64, %rsp
|
||||
; XOP-NEXT: vmovd %xmm1, %r8d
|
||||
; XOP-NEXT: vpextrd $1, %xmm1, %r9d
|
||||
; XOP-NEXT: vpextrd $2, %xmm1, %r10d
|
||||
; XOP-NEXT: vpextrd $3, %xmm1, %esi
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; XOP-NEXT: vmovd %xmm1, %edi
|
||||
; XOP-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; XOP-NEXT: vpextrd $2, %xmm1, %ecx
|
||||
; XOP-NEXT: vpextrd $3, %xmm1, %edx
|
||||
; XOP-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; XOP-NEXT: andl $7, %r8d
|
||||
; XOP-NEXT: andl $7, %r9d
|
||||
; XOP-NEXT: andl $7, %r10d
|
||||
; XOP-NEXT: andl $7, %esi
|
||||
; XOP-NEXT: andl $7, %edi
|
||||
; XOP-NEXT: andl $7, %eax
|
||||
; XOP-NEXT: andl $7, %ecx
|
||||
; XOP-NEXT: andl $7, %edx
|
||||
; XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vpinsrd $1, (%rsp,%rax,4), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrd $2, (%rsp,%rcx,4), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrd $3, (%rsp,%rdx,4), %xmm0, %xmm0
|
||||
; XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vpinsrd $1, (%rsp,%r9,4), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpinsrd $2, (%rsp,%r10,4), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpinsrd $3, (%rsp,%rsi,4), %xmm1, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: movq %rbp, %rsp
|
||||
; XOP-NEXT: popq %rbp
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v8i32:
|
||||
|
@ -1371,27 +1325,13 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
|||
define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v4f64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: pushq %rbp
|
||||
; XOP-NEXT: movq %rsp, %rbp
|
||||
; XOP-NEXT: andq $-32, %rsp
|
||||
; XOP-NEXT: subq $64, %rsp
|
||||
; XOP-NEXT: vmovq %xmm1, %rax
|
||||
; XOP-NEXT: andl $3, %eax
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; XOP-NEXT: andl $3, %ecx
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm3
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; XOP-NEXT: vmovq %xmm1, %rdx
|
||||
; XOP-NEXT: andl $3, %edx
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; XOP-NEXT: andl $3, %esi
|
||||
; XOP-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOP-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; XOP-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: movq %rbp, %rsp
|
||||
; XOP-NEXT: popq %rbp
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; XOP-NEXT: vpermil2pd $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v4f64:
|
||||
|
@ -1488,39 +1428,9 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
|
|||
define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v8f32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: pushq %rbp
|
||||
; XOP-NEXT: movq %rsp, %rbp
|
||||
; XOP-NEXT: andq $-32, %rsp
|
||||
; XOP-NEXT: subq $64, %rsp
|
||||
; XOP-NEXT: vmovd %xmm1, %esi
|
||||
; XOP-NEXT: vpextrd $1, %xmm1, %r8d
|
||||
; XOP-NEXT: vpextrd $2, %xmm1, %r9d
|
||||
; XOP-NEXT: vpextrd $3, %xmm1, %r10d
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; XOP-NEXT: vmovd %xmm1, %edx
|
||||
; XOP-NEXT: vpextrd $1, %xmm1, %edi
|
||||
; XOP-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; XOP-NEXT: vpextrd $3, %xmm1, %ecx
|
||||
; XOP-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; XOP-NEXT: andl $7, %esi
|
||||
; XOP-NEXT: andl $7, %r8d
|
||||
; XOP-NEXT: andl $7, %r9d
|
||||
; XOP-NEXT: andl $7, %r10d
|
||||
; XOP-NEXT: andl $7, %edx
|
||||
; XOP-NEXT: andl $7, %edi
|
||||
; XOP-NEXT: andl $7, %eax
|
||||
; XOP-NEXT: andl $7, %ecx
|
||||
; XOP-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
; XOP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: movq %rbp, %rsp
|
||||
; XOP-NEXT: popq %rbp
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v8f32:
|
||||
|
@ -1598,23 +1508,14 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
|
|||
define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v4i64_from_v2i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovq %xmm1, %rax
|
||||
; XOP-NEXT: andl $1, %eax
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; XOP-NEXT: andl $1, %ecx
|
||||
; XOP-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm3
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; XOP-NEXT: vmovq %xmm1, %rdx
|
||||
; XOP-NEXT: andl $1, %edx
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; XOP-NEXT: andl $1, %esi
|
||||
; XOP-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; XOP-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; XOP-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; XOP-NEXT: vpermil2pd $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v4i64_from_v2i64:
|
||||
|
@ -1712,33 +1613,10 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
|
|||
define <8 x i32> @var_shuffle_v8i32_from_v4i32(<4 x i32> %v, <8 x i32> %indices) unnamed_addr nounwind {
|
||||
; XOP-LABEL: var_shuffle_v8i32_from_v4i32:
|
||||
; XOP: # %bb.0: # %entry
|
||||
; XOP-NEXT: vmovd %xmm1, %r8d
|
||||
; XOP-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; XOP-NEXT: andl $3, %r8d
|
||||
; XOP-NEXT: vpextrd $1, %xmm1, %r9d
|
||||
; XOP-NEXT: andl $3, %r9d
|
||||
; XOP-NEXT: vpextrd $2, %xmm1, %r10d
|
||||
; XOP-NEXT: andl $3, %r10d
|
||||
; XOP-NEXT: vpextrd $3, %xmm1, %esi
|
||||
; XOP-NEXT: andl $3, %esi
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; XOP-NEXT: vmovd %xmm0, %edi
|
||||
; XOP-NEXT: andl $3, %edi
|
||||
; XOP-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; XOP-NEXT: andl $3, %eax
|
||||
; XOP-NEXT: vpextrd $2, %xmm0, %ecx
|
||||
; XOP-NEXT: andl $3, %ecx
|
||||
; XOP-NEXT: vpextrd $3, %xmm0, %edx
|
||||
; XOP-NEXT: andl $3, %edx
|
||||
; XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vpinsrd $1, -24(%rsp,%rax,4), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrd $2, -24(%rsp,%rcx,4), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpinsrd $3, -24(%rsp,%rdx,4), %xmm0, %xmm0
|
||||
; XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vpinsrd $1, -24(%rsp,%r9,4), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpinsrd $2, -24(%rsp,%r10,4), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpinsrd $3, -24(%rsp,%rsi,4), %xmm1, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v8i32_from_v4i32:
|
||||
|
@ -2880,21 +2758,14 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
|||
define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v4f64_from_v2f64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovq %xmm1, %rax
|
||||
; XOP-NEXT: andl $1, %eax
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; XOP-NEXT: andl $1, %ecx
|
||||
; XOP-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm3
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; XOP-NEXT: vmovq %xmm1, %rdx
|
||||
; XOP-NEXT: andl $1, %edx
|
||||
; XOP-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; XOP-NEXT: andl $1, %esi
|
||||
; XOP-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOP-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; XOP-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; XOP-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: vpaddq %xmm1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; XOP-NEXT: vpermil2pd $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v4f64_from_v2f64:
|
||||
|
@ -2990,33 +2861,10 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
|
|||
define <8 x float> @var_shuffle_v8f32_from_v4f32(<4 x float> %v, <8 x i32> %indices) unnamed_addr nounwind {
|
||||
; XOP-LABEL: var_shuffle_v8f32_from_v4f32:
|
||||
; XOP: # %bb.0: # %entry
|
||||
; XOP-NEXT: vmovd %xmm1, %r8d
|
||||
; XOP-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; XOP-NEXT: andl $3, %r8d
|
||||
; XOP-NEXT: vpextrd $1, %xmm1, %r9d
|
||||
; XOP-NEXT: andl $3, %r9d
|
||||
; XOP-NEXT: vpextrd $2, %xmm1, %r10d
|
||||
; XOP-NEXT: andl $3, %r10d
|
||||
; XOP-NEXT: vpextrd $3, %xmm1, %esi
|
||||
; XOP-NEXT: andl $3, %esi
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; XOP-NEXT: vmovd %xmm0, %edi
|
||||
; XOP-NEXT: andl $3, %edi
|
||||
; XOP-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; XOP-NEXT: andl $3, %eax
|
||||
; XOP-NEXT: vpextrd $2, %xmm0, %ecx
|
||||
; XOP-NEXT: andl $3, %ecx
|
||||
; XOP-NEXT: vpextrd $3, %xmm0, %edx
|
||||
; XOP-NEXT: andl $3, %edx
|
||||
; XOP-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
; XOP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; XOP-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v8f32_from_v4f32:
|
||||
|
|
Loading…
Reference in New Issue