forked from OSchip/llvm-project
[X86][SSE] LowerBUILD_VECTORAsVariablePermute - add support for scaling index vectors
This allows us to use PSHUFB for v8i16/v4i32 and VPERMD/PERMPS for v4i64/v4f64 variable shuffles. Differential Revision: https://reviews.llvm.org/D42487 llvm-svn: 323987
This commit is contained in:
parent
6691e112ce
commit
1a8cefc328
|
@ -7818,8 +7818,6 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
|
|||
// TODO: Handle undefs
|
||||
// TODO: Utilize pshufb and zero mask blending to support more efficient
|
||||
// construction of vectors with constant-0 elements.
|
||||
// TODO: Use smaller-element vectors of same width, and "interpolate" the
|
||||
// indices, when no native operation available.
|
||||
static SDValue
|
||||
LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
|
@ -7833,11 +7831,22 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
|||
if (Subtarget.hasSSE3())
|
||||
Opcode = X86ISD::PSHUFB;
|
||||
break;
|
||||
case MVT::v8i16:
|
||||
if (Subtarget.hasVLX() && Subtarget.hasBWI())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
else if (Subtarget.hasSSE3()) {
|
||||
Opcode = X86ISD::PSHUFB;
|
||||
ShuffleVT = MVT::v16i8;
|
||||
}
|
||||
break;
|
||||
case MVT::v4f32:
|
||||
case MVT::v4i32:
|
||||
if (Subtarget.hasAVX()) {
|
||||
Opcode = X86ISD::VPERMILPV;
|
||||
ShuffleVT = MVT::v4f32;
|
||||
} else if (Subtarget.hasSSE3()) {
|
||||
Opcode = X86ISD::PSHUFB;
|
||||
ShuffleVT = MVT::v16i8;
|
||||
}
|
||||
break;
|
||||
case MVT::v2f64:
|
||||
|
@ -7856,6 +7865,10 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
|||
case MVT::v4f64:
|
||||
if (Subtarget.hasVLX())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
else if (Subtarget.hasAVX2()) {
|
||||
Opcode = X86ISD::VPERMV;
|
||||
ShuffleVT = MVT::v8f32;
|
||||
}
|
||||
break;
|
||||
case MVT::v16f32:
|
||||
case MVT::v8f64:
|
||||
|
@ -7868,7 +7881,6 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
|||
if (Subtarget.hasBWI())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
break;
|
||||
case MVT::v8i16:
|
||||
case MVT::v16i16:
|
||||
if (Subtarget.hasVLX() && Subtarget.hasBWI())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
|
@ -7927,8 +7939,8 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
|||
unsigned Opcode = LegalPermuteOpcode(VT, ShuffleVT);
|
||||
if (!Opcode)
|
||||
return SDValue();
|
||||
assert(VT.getScalarSizeInBits() == ShuffleVT.getScalarSizeInBits() &&
|
||||
VT.getVectorNumElements() == ShuffleVT.getVectorNumElements() &&
|
||||
assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) &&
|
||||
(VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 &&
|
||||
"Illegal variable permute shuffle type");
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
@ -7950,6 +7962,33 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
|||
SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
|
||||
}
|
||||
|
||||
uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits();
|
||||
if (Scale > 1) {
|
||||
assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale");
|
||||
unsigned ShuffleBits = ShuffleVT.getScalarSizeInBits();
|
||||
uint64_t IndexScale = 0;
|
||||
uint64_t IndexOffset = 0;
|
||||
|
||||
// If we're scaling a smaller permute op, then we need to repeat the indices,
|
||||
// scaling and offsetting them as well.
|
||||
// e.g. v4i32 -> v16i8 (Scale = 4)
|
||||
// IndexScale = v4i32 Splat(4 << 24 | 4 << 16 | 4 << 8 | 4)
|
||||
// indexOffset = v4i32 Splat(3 << 24 | 2 << 16 | 1 << 8 | 0)
|
||||
for (uint64_t i = 0; i != Scale; ++i) {
|
||||
IndexScale |= Scale << (i * ShuffleBits);
|
||||
IndexOffset |= i << (i * ShuffleBits);
|
||||
}
|
||||
|
||||
SDLoc DL(IndicesVec);
|
||||
IndicesVec = DAG.getNode(ISD::MUL, DL, IndicesVT, IndicesVec,
|
||||
DAG.getConstant(IndexScale, DL, IndicesVT));
|
||||
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec,
|
||||
DAG.getConstant(IndexOffset, DL, IndicesVT));
|
||||
}
|
||||
|
||||
EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger();
|
||||
IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec);
|
||||
|
||||
SrcVec = DAG.getBitcast(ShuffleVT, SrcVec);
|
||||
SDValue Res =
|
||||
Opcode == X86ISD::VPERMV
|
||||
|
|
|
@ -37,25 +37,15 @@ define <2 x i64> @var_shuffle_v2i64(<2 x i64> %v, <2 x i64> %indices) nounwind {
|
|||
define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind {
|
||||
; SSSE3-LABEL: var_shuffle_v4i32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movd %xmm1, %eax
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm2, %edx
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %esi
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSSE3-NEXT: andl $3, %eax
|
||||
; SSSE3-NEXT: andl $3, %ecx
|
||||
; SSSE3-NEXT: andl $3, %edx
|
||||
; SSSE3-NEXT: andl $3, %esi
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [67372036,67372036,67372036,67372036]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pmuludq %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; SSSE3-NEXT: pmuludq %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSSE3-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: var_shuffle_v4i32:
|
||||
|
@ -80,76 +70,16 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind {
|
|||
define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
|
||||
; SSSE3-LABEL: var_shuffle_v8i16:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movd %xmm1, %r8d
|
||||
; SSSE3-NEXT: pextrw $1, %xmm1, %r9d
|
||||
; SSSE3-NEXT: pextrw $2, %xmm1, %r10d
|
||||
; SSSE3-NEXT: pextrw $3, %xmm1, %esi
|
||||
; SSSE3-NEXT: pextrw $4, %xmm1, %edi
|
||||
; SSSE3-NEXT: pextrw $5, %xmm1, %eax
|
||||
; SSSE3-NEXT: pextrw $6, %xmm1, %ecx
|
||||
; SSSE3-NEXT: pextrw $7, %xmm1, %edx
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSSE3-NEXT: andl $7, %r8d
|
||||
; SSSE3-NEXT: andl $7, %r9d
|
||||
; SSSE3-NEXT: andl $7, %r10d
|
||||
; SSSE3-NEXT: andl $7, %esi
|
||||
; SSSE3-NEXT: andl $7, %edi
|
||||
; SSSE3-NEXT: andl $7, %eax
|
||||
; SSSE3-NEXT: andl $7, %ecx
|
||||
; SSSE3-NEXT: andl $7, %edx
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%rdx,2), %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm0
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm1
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%rdi,2), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm2
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm3
|
||||
; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSSE3-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm1
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; AVXNOVLBW-LABEL: var_shuffle_v8i16:
|
||||
; AVXNOVLBW: # %bb.0:
|
||||
; AVXNOVLBW-NEXT: vmovd %xmm1, %eax
|
||||
; AVXNOVLBW-NEXT: vpextrw $1, %xmm1, %r10d
|
||||
; AVXNOVLBW-NEXT: vpextrw $2, %xmm1, %ecx
|
||||
; AVXNOVLBW-NEXT: vpextrw $3, %xmm1, %edx
|
||||
; AVXNOVLBW-NEXT: vpextrw $4, %xmm1, %esi
|
||||
; AVXNOVLBW-NEXT: vpextrw $5, %xmm1, %edi
|
||||
; AVXNOVLBW-NEXT: vpextrw $6, %xmm1, %r8d
|
||||
; AVXNOVLBW-NEXT: vpextrw $7, %xmm1, %r9d
|
||||
; AVXNOVLBW-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVXNOVLBW-NEXT: andl $7, %eax
|
||||
; AVXNOVLBW-NEXT: andl $7, %r10d
|
||||
; AVXNOVLBW-NEXT: andl $7, %ecx
|
||||
; AVXNOVLBW-NEXT: andl $7, %edx
|
||||
; AVXNOVLBW-NEXT: andl $7, %esi
|
||||
; AVXNOVLBW-NEXT: andl $7, %edi
|
||||
; AVXNOVLBW-NEXT: andl $7, %r8d
|
||||
; AVXNOVLBW-NEXT: andl $7, %r9d
|
||||
; AVXNOVLBW-NEXT: movzwl -24(%rsp,%rax,2), %eax
|
||||
; AVXNOVLBW-NEXT: vmovd %eax, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $1, -24(%rsp,%r10,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $2, -24(%rsp,%rcx,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $3, -24(%rsp,%rdx,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $4, -24(%rsp,%rsi,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $5, -24(%rsp,%rdi,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $6, -24(%rsp,%r8,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpinsrw $7, -24(%rsp,%r9,2), %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVXNOVLBW-NEXT: vpaddw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVXNOVLBW-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVXNOVLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: var_shuffle_v8i16:
|
||||
|
@ -273,25 +203,15 @@ define <2 x double> @var_shuffle_v2f64(<2 x double> %v, <2 x i64> %indices) noun
|
|||
define <4 x float> @var_shuffle_v4f32(<4 x float> %v, <4 x i32> %indices) nounwind {
|
||||
; SSSE3-LABEL: var_shuffle_v4f32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movd %xmm1, %eax
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm2, %ecx
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSSE3-NEXT: movd %xmm2, %edx
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; SSSE3-NEXT: movd %xmm1, %esi
|
||||
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSSE3-NEXT: andl $3, %eax
|
||||
; SSSE3-NEXT: andl $3, %ecx
|
||||
; SSSE3-NEXT: andl $3, %edx
|
||||
; SSSE3-NEXT: andl $3, %esi
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [67372036,67372036,67372036,67372036]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
|
||||
; SSSE3-NEXT: pmuludq %xmm2, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; SSSE3-NEXT: pmuludq %xmm2, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSSE3-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: var_shuffle_v4f32:
|
||||
|
|
|
@ -38,83 +38,44 @@ define <4 x i64> @var_shuffle_v4i64(<4 x i64> %v, <4 x i64> %indices) nounwind {
|
|||
;
|
||||
; AVX2-LABEL: var_shuffle_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: pushq %rbp
|
||||
; AVX2-NEXT: movq %rsp, %rbp
|
||||
; AVX2-NEXT: andq $-32, %rsp
|
||||
; AVX2-NEXT: subq $64, %rsp
|
||||
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; AVX2-NEXT: andl $3, %eax
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX2-NEXT: andl $3, %ecx
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX2-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX2-NEXT: andl $3, %edx
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX2-NEXT: andl $3, %esi
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: movq %rbp, %rsp
|
||||
; AVX2-NEXT: popq %rbp
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: var_shuffle_v4i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: pushq %rbp
|
||||
; AVX512F-NEXT: movq %rsp, %rbp
|
||||
; AVX512F-NEXT: andq $-32, %rsp
|
||||
; AVX512F-NEXT: subq $64, %rsp
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: andl $3, %eax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512F-NEXT: andl $3, %ecx
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512F-NEXT: andl $3, %edx
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512F-NEXT: andl $3, %esi
|
||||
; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: movq %rbp, %rsp
|
||||
; AVX512F-NEXT: popq %rbp
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX512F-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: var_shuffle_v4i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: pushq %rbp
|
||||
; AVX512DQ-NEXT: movq %rsp, %rbp
|
||||
; AVX512DQ-NEXT: andq $-32, %rsp
|
||||
; AVX512DQ-NEXT: subq $64, %rsp
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512DQ-NEXT: andl $3, %eax
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512DQ-NEXT: andl $3, %ecx
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512DQ-NEXT: andl $3, %edx
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512DQ-NEXT: andl $3, %esi
|
||||
; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: movq %rbp, %rsp
|
||||
; AVX512DQ-NEXT: popq %rbp
|
||||
; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512DQ-NEXT: vpmullq %zmm2, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512DQ-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_shuffle_v4i64:
|
||||
|
@ -1348,77 +1309,44 @@ define <4 x double> @var_shuffle_v4f64(<4 x double> %v, <4 x i64> %indices) noun
|
|||
;
|
||||
; AVX2-LABEL: var_shuffle_v4f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: pushq %rbp
|
||||
; AVX2-NEXT: movq %rsp, %rbp
|
||||
; AVX2-NEXT: andq $-32, %rsp
|
||||
; AVX2-NEXT: subq $64, %rsp
|
||||
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; AVX2-NEXT: andl $3, %eax
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX2-NEXT: andl $3, %ecx
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX2-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX2-NEXT: andl $3, %edx
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX2-NEXT: andl $3, %esi
|
||||
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: movq %rbp, %rsp
|
||||
; AVX2-NEXT: popq %rbp
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: var_shuffle_v4f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: pushq %rbp
|
||||
; AVX512F-NEXT: movq %rsp, %rbp
|
||||
; AVX512F-NEXT: andq $-32, %rsp
|
||||
; AVX512F-NEXT: subq $64, %rsp
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: andl $3, %eax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512F-NEXT: andl $3, %ecx
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512F-NEXT: andl $3, %edx
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512F-NEXT: andl $3, %esi
|
||||
; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: movq %rbp, %rsp
|
||||
; AVX512F-NEXT: popq %rbp
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX512F-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: var_shuffle_v4f64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: pushq %rbp
|
||||
; AVX512DQ-NEXT: movq %rsp, %rbp
|
||||
; AVX512DQ-NEXT: andq $-32, %rsp
|
||||
; AVX512DQ-NEXT: subq $64, %rsp
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512DQ-NEXT: andl $3, %eax
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512DQ-NEXT: andl $3, %ecx
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512DQ-NEXT: andl $3, %edx
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512DQ-NEXT: andl $3, %esi
|
||||
; AVX512DQ-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: movq %rbp, %rsp
|
||||
; AVX512DQ-NEXT: popq %rbp
|
||||
; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512DQ-NEXT: vpmullq %zmm2, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512DQ-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_shuffle_v4f64:
|
||||
|
@ -1542,65 +1470,47 @@ define <4 x i64> @var_shuffle_v4i64_from_v2i64(<2 x i64> %v, <4 x i64> %indices)
|
|||
;
|
||||
; AVX2-LABEL: var_shuffle_v4i64_from_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX2-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX2-NEXT: andl $1, %esi
|
||||
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: var_shuffle_v4i64_from_v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: andl $1, %eax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512F-NEXT: andl $1, %ecx
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512F-NEXT: andl $1, %edx
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512F-NEXT: andl $1, %esi
|
||||
; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX512F-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: var_shuffle_v4i64_from_v2i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512DQ-NEXT: andl $1, %eax
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512DQ-NEXT: andl $1, %ecx
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512DQ-NEXT: andl $1, %edx
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512DQ-NEXT: andl $1, %esi
|
||||
; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512DQ-NEXT: vpmullq %zmm2, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512DQ-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_shuffle_v4i64_from_v2i64:
|
||||
|
@ -2768,59 +2678,47 @@ define <4 x double> @var_shuffle_v4f64_from_v2f64(<2 x double> %v, <4 x i64> %in
|
|||
;
|
||||
; AVX2-LABEL: var_shuffle_v4f64_from_v2f64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; AVX2-NEXT: andl $1, %eax
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX2-NEXT: andl $1, %ecx
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX2-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX2-NEXT: andl $1, %edx
|
||||
; AVX2-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX2-NEXT: andl $1, %esi
|
||||
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: var_shuffle_v4f64_from_v2f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: andl $1, %eax
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512F-NEXT: andl $1, %ecx
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512F-NEXT: andl $1, %edx
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512F-NEXT: andl $1, %esi
|
||||
; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,2,2,2]
|
||||
; AVX512F-NEXT: vpmuludq %ymm2, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm4 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpmuludq %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: var_shuffle_v4f64_from_v2f64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512DQ-NEXT: andl $1, %eax
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; AVX512DQ-NEXT: andl $1, %ecx
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512DQ-NEXT: andl $1, %edx
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512DQ-NEXT: andl $1, %esi
|
||||
; AVX512DQ-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512DQ-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [8589934594,8589934594,8589934594,8589934594]
|
||||
; AVX512DQ-NEXT: vpmullq %zmm2, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
|
||||
; AVX512DQ-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: var_shuffle_v4f64_from_v2f64:
|
||||
|
|
Loading…
Reference in New Issue