forked from OSchip/llvm-project
[x86] try harder to scalarize a vector load with extracted integer op uses
This is a retry of b4b97ec813
- that was reverted because it
could cause miscompiles by illegally reordering memory operations.
A new test based on #53695 is added here to verify we do not have
that same problem.
extract_vec_elt (load X), C --> scalar load (X+C)
As noted in the comment, DAGCombiner has this fold -- and the code in this
patch is adapted from DAGCombiner::scalarizeExtractedVectorLoad() -- but
x86 should benefit even if the loaded vector has other uses as long as we
apply some other x86-specific conditions. The motivating example from #50310
is shown in vec_int_to_fp.ll.
Fixes #50310
Fixes #53695
Differential Revision: https://reviews.llvm.org/D118376
This commit is contained in:
parent
83ccce6ced
commit
c486b82cfb
|
@ -43231,6 +43231,35 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
// If this extract is from a loaded vector value and will be used as an
|
||||
// integer, that requires a potentially expensive XMM -> GPR transfer.
|
||||
// Additionally, if we can convert to a scalar integer load, that will likely
|
||||
// be folded into a subsequent integer op.
|
||||
// Note: Unlike the related fold for this in DAGCombiner, this is not limited
|
||||
// to a single-use of the loaded vector. For the reasons above, we
|
||||
// expect this to be profitable even if it creates an extra load.
|
||||
bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
|
||||
return Use->getOpcode() == ISD::STORE ||
|
||||
Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
|
||||
Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
|
||||
});
|
||||
auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
|
||||
if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
|
||||
SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
|
||||
!LikelyUsedAsVector) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue NewPtr =
|
||||
TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
|
||||
unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
|
||||
MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
|
||||
Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
|
||||
SDValue Load =
|
||||
DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
|
||||
LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
|
||||
DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
|
||||
return Load;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -10,13 +10,13 @@
|
|||
define <4 x i32> @test(<4 x i32>* %p) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movaps (%rdi), %xmm0
|
||||
; CHECK-NEXT: extractps $2, %xmm0, %eax
|
||||
; CHECK-NEXT: cmpl $3, %eax
|
||||
; CHECK-NEXT: je .LBB0_2
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: cmpl $3, 8(%rdi)
|
||||
; CHECK-NEXT: je .LBB0_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB0_1:
|
||||
; CHECK-NEXT: movaps (%rdi), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%v = load <4 x i32>, <4 x i32>* %p
|
||||
%e = extractelement <4 x i32> %v, i32 2
|
||||
|
|
|
@ -148,18 +148,12 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
|
|||
define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
|
||||
; NODQ-LABEL: slto4f32_mem:
|
||||
; NODQ: # %bb.0:
|
||||
; NODQ-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; NODQ-NEXT: vmovdqu 16(%rdi), %xmm1
|
||||
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
||||
; NODQ-NEXT: vmovq %xmm0, %rax
|
||||
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
||||
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; NODQ-NEXT: vmovq %xmm1, %rax
|
||||
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||
; NODQ-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
|
||||
; NODQ-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
|
||||
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; NODQ-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
|
||||
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; NODQ-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
|
||||
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; NODQ-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -542,10 +542,8 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmovb2m %zmm0, %k0
|
||||
; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %ecx
|
||||
; AVX512-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; AVX512-NEXT: addl %ecx, %eax
|
||||
; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: addl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%1 = icmp slt <64 x i8> %a0, zeroinitializer
|
||||
|
|
|
@ -301,33 +301,35 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
|
|||
ret void
|
||||
}
|
||||
|
||||
; A scalar load is favored over a XMM->GPR register transfer in this example.
|
||||
|
||||
define i32 @multi_use_load_scalarization(<4 x i32>* %p) nounwind {
|
||||
; X32-SSE2-LABEL: multi_use_load_scalarization:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-SSE2-NEXT: movl (%ecx), %eax
|
||||
; X32-SSE2-NEXT: movdqu (%ecx), %xmm0
|
||||
; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: movd %xmm0, %eax
|
||||
; X32-SSE2-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: movdqa %xmm0, (%ecx)
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSSE3-LABEL: multi_use_load_scalarization:
|
||||
; X64-SSSE3: # %bb.0:
|
||||
; X64-SSSE3-NEXT: movl (%rdi), %eax
|
||||
; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSSE3-NEXT: movd %xmm0, %eax
|
||||
; X64-SSSE3-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi)
|
||||
; X64-SSSE3-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: multi_use_load_scalarization:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi)
|
||||
; X64-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; X64-AVX-NEXT: retq
|
||||
%v = load <4 x i32>, <4 x i32>* %p, align 1
|
||||
%v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
|
||||
|
@ -336,6 +338,12 @@ define i32 @multi_use_load_scalarization(<4 x i32>* %p) nounwind {
|
|||
ret i32 %r
|
||||
}
|
||||
|
||||
; This test is reduced from a C source example that showed a miscompile:
|
||||
; https://github.com/llvm/llvm-project/issues/53695
|
||||
; The scalarized loads from 'zero' in the AVX asm must occur before
|
||||
; the vector store to 'zero' overwrites the values.
|
||||
; If compiled to a binary, this test should return 0 if correct.
|
||||
|
||||
@n1 = local_unnamed_addr global <8 x i32> <i32 0, i32 42, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0>, align 32
|
||||
@zero = internal unnamed_addr global <8 x i32> zeroinitializer, align 32
|
||||
|
||||
|
@ -419,21 +427,21 @@ define i32 @main() nounwind {
|
|||
; X64-AVX1-NEXT: subq $64, %rsp
|
||||
; X64-AVX1-NEXT: movq n1@GOTPCREL(%rip), %rax
|
||||
; X64-AVX1-NEXT: vmovaps (%rax), %ymm0
|
||||
; X64-AVX1-NEXT: vmovaps zero(%rip), %xmm1
|
||||
; X64-AVX1-NEXT: movl zero+4(%rip), %ecx
|
||||
; X64-AVX1-NEXT: movl zero+8(%rip), %eax
|
||||
; X64-AVX1-NEXT: vmovaps %ymm0, zero(%rip)
|
||||
; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
|
||||
; X64-AVX1-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX1-NEXT: vmovaps (%rsp), %ymm0
|
||||
; X64-AVX1-NEXT: vextractps $2, %xmm1, %eax
|
||||
; X64-AVX1-NEXT: vextractps $2, %xmm0, %ecx
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: movl %eax, %ecx
|
||||
; X64-AVX1-NEXT: vextractps $1, %xmm1, %eax
|
||||
; X64-AVX1-NEXT: vextractps $1, %xmm0, %esi
|
||||
; X64-AVX1-NEXT: vextractps $2, %xmm0, %esi
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %esi
|
||||
; X64-AVX1-NEXT: addl %ecx, %eax
|
||||
; X64-AVX1-NEXT: movl %eax, %esi
|
||||
; X64-AVX1-NEXT: vextractps $1, %xmm0, %edi
|
||||
; X64-AVX1-NEXT: movl %ecx, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %edi
|
||||
; X64-AVX1-NEXT: addl %esi, %eax
|
||||
; X64-AVX1-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX1-NEXT: popq %rbp
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
@ -447,21 +455,21 @@ define i32 @main() nounwind {
|
|||
; X64-AVX2-NEXT: subq $64, %rsp
|
||||
; X64-AVX2-NEXT: movq n1@GOTPCREL(%rip), %rax
|
||||
; X64-AVX2-NEXT: vmovaps (%rax), %ymm0
|
||||
; X64-AVX2-NEXT: vmovaps zero(%rip), %xmm1
|
||||
; X64-AVX2-NEXT: movl zero+4(%rip), %ecx
|
||||
; X64-AVX2-NEXT: movl zero+8(%rip), %eax
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, zero(%rip)
|
||||
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; X64-AVX2-NEXT: vmovaps (%rsp), %ymm0
|
||||
; X64-AVX2-NEXT: vextractps $2, %xmm1, %eax
|
||||
; X64-AVX2-NEXT: vextractps $2, %xmm0, %ecx
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: movl %eax, %ecx
|
||||
; X64-AVX2-NEXT: vextractps $1, %xmm1, %eax
|
||||
; X64-AVX2-NEXT: vextractps $1, %xmm0, %esi
|
||||
; X64-AVX2-NEXT: vextractps $2, %xmm0, %esi
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %esi
|
||||
; X64-AVX2-NEXT: addl %ecx, %eax
|
||||
; X64-AVX2-NEXT: movl %eax, %esi
|
||||
; X64-AVX2-NEXT: vextractps $1, %xmm0, %edi
|
||||
; X64-AVX2-NEXT: movl %ecx, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %edi
|
||||
; X64-AVX2-NEXT: addl %esi, %eax
|
||||
; X64-AVX2-NEXT: movq %rbp, %rsp
|
||||
; X64-AVX2-NEXT: popq %rbp
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
|
|
|
@ -161,46 +161,46 @@ define <16 x i32> @PR42819(<8 x i32>* %a0) {
|
|||
define void @PR42833() {
|
||||
; SSE2-LABEL: PR42833:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa c+144(%rip), %xmm1
|
||||
; SSE2-NEXT: movdqa c+128(%rip), %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: addl b(%rip), %eax
|
||||
; SSE2-NEXT: movl b(%rip), %eax
|
||||
; SSE2-NEXT: movdqa c+144(%rip), %xmm0
|
||||
; SSE2-NEXT: movdqa c+128(%rip), %xmm1
|
||||
; SSE2-NEXT: addl c+128(%rip), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: movd %eax, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm3
|
||||
; SSE2-NEXT: movdqa d+144(%rip), %xmm4
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm5
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm5
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm1, c+144(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm0, c+144(%rip)
|
||||
; SSE2-NEXT: movaps %xmm5, c+128(%rip)
|
||||
; SSE2-NEXT: movdqa c+160(%rip), %xmm1
|
||||
; SSE2-NEXT: movdqa c+160(%rip), %xmm0
|
||||
; SSE2-NEXT: movdqa c+176(%rip), %xmm3
|
||||
; SSE2-NEXT: movdqa d+160(%rip), %xmm5
|
||||
; SSE2-NEXT: movdqa d+176(%rip), %xmm6
|
||||
; SSE2-NEXT: movdqa d+128(%rip), %xmm7
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm7
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm7
|
||||
; SSE2-NEXT: psubd %xmm3, %xmm6
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm5
|
||||
; SSE2-NEXT: psubd %xmm0, %xmm5
|
||||
; SSE2-NEXT: movdqa %xmm5, d+160(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm6, d+176(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm4, d+144(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm7, d+128(%rip)
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, c+160(%rip)
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm0
|
||||
; SSE2-NEXT: movdqa %xmm0, c+160(%rip)
|
||||
; SSE2-NEXT: movdqa %xmm3, c+176(%rip)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE42-LABEL: PR42833:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: movl b(%rip), %eax
|
||||
; SSE42-NEXT: movdqa c+144(%rip), %xmm0
|
||||
; SSE42-NEXT: movdqa c+128(%rip), %xmm1
|
||||
; SSE42-NEXT: movd %xmm1, %eax
|
||||
; SSE42-NEXT: addl b(%rip), %eax
|
||||
; SSE42-NEXT: addl c+128(%rip), %eax
|
||||
; SSE42-NEXT: movd %eax, %xmm2
|
||||
; SSE42-NEXT: paddd %xmm1, %xmm2
|
||||
; SSE42-NEXT: movdqa d+144(%rip), %xmm3
|
||||
|
@ -232,20 +232,20 @@ define void @PR42833() {
|
|||
;
|
||||
; AVX1-LABEL: PR42833:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa c+128(%rip), %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: addl b(%rip), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2
|
||||
; AVX1-NEXT: movl b(%rip), %eax
|
||||
; AVX1-NEXT: addl c+128(%rip), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vmovdqa c+128(%rip), %xmm1
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vmovdqa c+144(%rip), %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vmovdqa d+144(%rip), %xmm2
|
||||
; AVX1-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovups %ymm1, c+128(%rip)
|
||||
; AVX1-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovups %ymm0, c+128(%rip)
|
||||
; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovdqa d+128(%rip), %xmm1
|
||||
; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vmovdqa d+176(%rip), %xmm1
|
||||
|
@ -314,20 +314,20 @@ define void @PR42833() {
|
|||
;
|
||||
; XOP-LABEL: PR42833:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa c+128(%rip), %xmm0
|
||||
; XOP-NEXT: vmovd %xmm0, %eax
|
||||
; XOP-NEXT: addl b(%rip), %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2
|
||||
; XOP-NEXT: movl b(%rip), %eax
|
||||
; XOP-NEXT: addl c+128(%rip), %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm0
|
||||
; XOP-NEXT: vmovdqa c+128(%rip), %xmm1
|
||||
; XOP-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm2
|
||||
; XOP-NEXT: vmovdqa c+144(%rip), %xmm3
|
||||
; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vmovdqa d+144(%rip), %xmm2
|
||||
; XOP-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2
|
||||
; XOP-NEXT: vmovups %ymm1, c+128(%rip)
|
||||
; XOP-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vmovups %ymm0, c+128(%rip)
|
||||
; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0
|
||||
; XOP-NEXT: vmovdqa d+128(%rip), %xmm1
|
||||
; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: vmovdqa d+176(%rip), %xmm1
|
||||
|
|
|
@ -76,28 +76,23 @@ define i1 @parseHeaders2_scalar_and(i64 * %ptr) nounwind {
|
|||
; SSE2-LABEL: parseHeaders2_scalar_and:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: testq %rcx, %rax
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, (%rdi)
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: parseHeaders2_scalar_and:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE41-NEXT: movq %xmm0, %rax
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rcx
|
||||
; SSE41-NEXT: testq %rcx, %rax
|
||||
; SSE41-NEXT: movq (%rdi), %rax
|
||||
; SSE41-NEXT: testq %rax, 8(%rdi)
|
||||
; SSE41-NEXT: sete %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: parseHeaders2_scalar_and:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; AVX-NEXT: testq %rcx, %rax
|
||||
; AVX-NEXT: movq (%rdi), %rax
|
||||
; AVX-NEXT: testq %rax, 8(%rdi)
|
||||
; AVX-NEXT: sete %al
|
||||
; AVX-NEXT: retq
|
||||
%vptr = bitcast i64 * %ptr to <2 x i64> *
|
||||
|
|
|
@ -403,32 +403,28 @@ define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
|
|||
; CHECK-NEXT: testl %edx, %edx
|
||||
; CHECK-NEXT: jle .LBB12_3
|
||||
; CHECK-NEXT: # %bb.1: # %bb.nph
|
||||
; CHECK-NEXT: movl %edx, %r9d
|
||||
; CHECK-NEXT: movl %edx, %r10d
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB12_2: # %for.body
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movdqa (%rdi,%rcx), %xmm0
|
||||
; CHECK-NEXT: movdqa (%rsi,%rcx), %xmm1
|
||||
; CHECK-NEXT: pextrd $1, %xmm0, %eax
|
||||
; CHECK-NEXT: pextrd $1, %xmm1, %r8d
|
||||
; CHECK-NEXT: movl (%rdi,%rcx), %r8d
|
||||
; CHECK-NEXT: movl 4(%rdi,%rcx), %eax
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %r8d
|
||||
; CHECK-NEXT: movl %eax, %r8d
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: movd %xmm1, %r10d
|
||||
; CHECK-NEXT: idivl 4(%rsi,%rcx)
|
||||
; CHECK-NEXT: movl %eax, %r9d
|
||||
; CHECK-NEXT: movl %r8d, %eax
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %r10d
|
||||
; CHECK-NEXT: movd %eax, %xmm2
|
||||
; CHECK-NEXT: pinsrd $1, %r8d, %xmm2
|
||||
; CHECK-NEXT: pextrd $2, %xmm0, %eax
|
||||
; CHECK-NEXT: pextrd $2, %xmm1, %r8d
|
||||
; CHECK-NEXT: idivl (%rsi,%rcx)
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: pinsrd $1, %r9d, %xmm0
|
||||
; CHECK-NEXT: movl 8(%rdi,%rcx), %eax
|
||||
; CHECK-NEXT: cltd
|
||||
; CHECK-NEXT: idivl %r8d
|
||||
; CHECK-NEXT: idivl 8(%rsi,%rcx)
|
||||
; CHECK-NEXT: movl %eax, 8(%rdi,%rcx)
|
||||
; CHECK-NEXT: movq %xmm2, (%rdi,%rcx)
|
||||
; CHECK-NEXT: movq %xmm0, (%rdi,%rcx)
|
||||
; CHECK-NEXT: addq $16, %rcx
|
||||
; CHECK-NEXT: decl %r9d
|
||||
; CHECK-NEXT: decl %r10d
|
||||
; CHECK-NEXT: jne .LBB12_2
|
||||
; CHECK-NEXT: .LBB12_3: # %for.end
|
||||
; CHECK-NEXT: retq
|
||||
|
|
|
@ -2072,10 +2072,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X86-SSE-NEXT: divl %ecx
|
||||
; X86-SSE-NEXT: movd %edx, %xmm4
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; X86-SSE-NEXT: movd %xmm1, %ecx
|
||||
; X86-SSE-NEXT: movl %esi, %eax
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %ecx
|
||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SSE-NEXT: divl 16(%esi)
|
||||
; X86-SSE-NEXT: movd %edx, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
|
||||
; X86-SSE-NEXT: movd %xmm2, %eax
|
||||
|
@ -2086,10 +2086,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X86-SSE-NEXT: movd %edx, %xmm1
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
|
||||
; X86-SSE-NEXT: movd %xmm0, %ecx
|
||||
; X86-SSE-NEXT: movl %edi, %eax
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %ecx
|
||||
; X86-SSE-NEXT: divl (%esi)
|
||||
; X86-SSE-NEXT: movd %edx, %xmm1
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
|
||||
; X86-SSE-NEXT: movd %xmm2, %ecx
|
||||
|
@ -2115,8 +2114,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; X86-SSE-NEXT: movl (%esp), %eax # 4-byte Reload
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE-NEXT: divl 32(%ecx)
|
||||
; X86-SSE-NEXT: divl 32(%esi)
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
|
||||
|
@ -2151,53 +2149,43 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X86-AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X86-AVX1-NEXT: vmovd %xmm2, %eax
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl 32(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-AVX1-NEXT: vpextrd $3, %xmm2, %eax
|
||||
; X86-AVX1-NEXT: vmovdqa (%ecx), %xmm1
|
||||
; X86-AVX1-NEXT: vmovdqa 16(%ecx), %xmm3
|
||||
; X86-AVX1-NEXT: vpextrd $3, %xmm3, %ecx
|
||||
; X86-AVX1-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %ecx
|
||||
; X86-AVX1-NEXT: divl 28(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-AVX1-NEXT: vpextrd $2, %xmm2, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $2, %xmm3, %ecx
|
||||
; X86-AVX1-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %ecx
|
||||
; X86-AVX1-NEXT: divl 24(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm2, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm3, %ecx
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %ecx
|
||||
; X86-AVX1-NEXT: divl 20(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, (%esp) # 4-byte Spill
|
||||
; X86-AVX1-NEXT: vmovd %xmm2, %eax
|
||||
; X86-AVX1-NEXT: vmovd %xmm3, %ecx
|
||||
; X86-AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %ecx
|
||||
; X86-AVX1-NEXT: divl 16(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, %ebp
|
||||
; X86-AVX1-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $3, %xmm1, %ecx
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %ecx
|
||||
; X86-AVX1-NEXT: divl 12(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, %ebx
|
||||
; X86-AVX1-NEXT: vpextrd $2, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $2, %xmm1, %esi
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %esi
|
||||
; X86-AVX1-NEXT: divl 8(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, %esi
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm1, %edi
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %edi
|
||||
; X86-AVX1-NEXT: divl 4(%ecx)
|
||||
; X86-AVX1-NEXT: movl %edx, %edi
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vmovd %xmm1, %ecx
|
||||
; X86-AVX1-NEXT: xorl %edx, %edx
|
||||
; X86-AVX1-NEXT: divl %ecx
|
||||
; X86-AVX1-NEXT: divl (%ecx)
|
||||
; X86-AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; X86-AVX1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0
|
||||
|
@ -2223,58 +2211,47 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
;
|
||||
; X86-AVX2-LABEL: PR34947:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: pushl %edi
|
||||
; X86-AVX2-NEXT: pushl %esi
|
||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; X86-AVX2-NEXT: vmovdqa (%esi), %xmm2
|
||||
; X86-AVX2-NEXT: vmovdqa 16(%esi), %xmm3
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm3, %ecx
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm4, %eax
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm2, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %ecx
|
||||
; X86-AVX2-NEXT: divl 20(%esi)
|
||||
; X86-AVX2-NEXT: movl %edx, %ecx
|
||||
; X86-AVX2-NEXT: vmovd %xmm3, %edi
|
||||
; X86-AVX2-NEXT: vmovd %xmm4, %eax
|
||||
; X86-AVX2-NEXT: vmovd %xmm2, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %edi
|
||||
; X86-AVX2-NEXT: vmovd %edx, %xmm5
|
||||
; X86-AVX2-NEXT: vpinsrd $1, %ecx, %xmm5, %xmm5
|
||||
; X86-AVX2-NEXT: vpextrd $2, %xmm3, %ecx
|
||||
; X86-AVX2-NEXT: vpextrd $2, %xmm4, %eax
|
||||
; X86-AVX2-NEXT: divl 16(%esi)
|
||||
; X86-AVX2-NEXT: vmovd %edx, %xmm3
|
||||
; X86-AVX2-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
|
||||
; X86-AVX2-NEXT: vpextrd $2, %xmm2, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %ecx
|
||||
; X86-AVX2-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
|
||||
; X86-AVX2-NEXT: vpextrd $3, %xmm3, %ecx
|
||||
; X86-AVX2-NEXT: vpextrd $3, %xmm4, %eax
|
||||
; X86-AVX2-NEXT: divl 24(%esi)
|
||||
; X86-AVX2-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
|
||||
; X86-AVX2-NEXT: vpextrd $3, %xmm2, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %ecx
|
||||
; X86-AVX2-NEXT: vpinsrd $3, %edx, %xmm5, %xmm3
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm2, %ecx
|
||||
; X86-AVX2-NEXT: divl 28(%esi)
|
||||
; X86-AVX2-NEXT: vpinsrd $3, %edx, %xmm3, %xmm2
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %ecx
|
||||
; X86-AVX2-NEXT: divl 4(%esi)
|
||||
; X86-AVX2-NEXT: movl %edx, %ecx
|
||||
; X86-AVX2-NEXT: vmovd %xmm2, %edi
|
||||
; X86-AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %edi
|
||||
; X86-AVX2-NEXT: vmovd %edx, %xmm4
|
||||
; X86-AVX2-NEXT: vpinsrd $1, %ecx, %xmm4, %xmm4
|
||||
; X86-AVX2-NEXT: vpextrd $2, %xmm2, %ecx
|
||||
; X86-AVX2-NEXT: divl (%esi)
|
||||
; X86-AVX2-NEXT: vmovd %edx, %xmm3
|
||||
; X86-AVX2-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
|
||||
; X86-AVX2-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %ecx
|
||||
; X86-AVX2-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
|
||||
; X86-AVX2-NEXT: vpextrd $3, %xmm2, %ecx
|
||||
; X86-AVX2-NEXT: divl 8(%esi)
|
||||
; X86-AVX2-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
|
||||
; X86-AVX2-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl %ecx
|
||||
; X86-AVX2-NEXT: vpinsrd $3, %edx, %xmm4, %xmm1
|
||||
; X86-AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: divl 12(%esi)
|
||||
; X86-AVX2-NEXT: vpinsrd $3, %edx, %xmm3, %xmm1
|
||||
; X86-AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl 32(%esi)
|
||||
|
@ -2284,7 +2261,6 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X86-AVX2-NEXT: movl %eax, (%eax)
|
||||
; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
|
||||
; X86-AVX2-NEXT: popl %esi
|
||||
; X86-AVX2-NEXT: popl %edi
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
;
|
||||
|
@ -2317,10 +2293,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X64-SSE-NEXT: divl %edi
|
||||
; X64-SSE-NEXT: movd %edx, %xmm4
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; X64-SSE-NEXT: movd %xmm1, %edi
|
||||
; X64-SSE-NEXT: movl %r9d, %eax
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %edi
|
||||
; X64-SSE-NEXT: divl 16(%rsi)
|
||||
; X64-SSE-NEXT: movd %edx, %xmm3
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
|
||||
; X64-SSE-NEXT: movd %xmm2, %eax
|
||||
|
@ -2331,10 +2306,9 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X64-SSE-NEXT: movd %edx, %xmm1
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
|
||||
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
|
||||
; X64-SSE-NEXT: movd %xmm0, %edi
|
||||
; X64-SSE-NEXT: movl %r10d, %eax
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %edi
|
||||
; X64-SSE-NEXT: divl (%rsi)
|
||||
; X64-SSE-NEXT: movd %edx, %xmm1
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
|
||||
; X64-SSE-NEXT: movd %xmm2, %edi
|
||||
|
@ -2385,60 +2359,50 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X64-AVX1-NEXT: pushq %rbp
|
||||
; X64-AVX1-NEXT: pushq %rbx
|
||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X64-AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; X64-AVX1-NEXT: vmovd %xmm2, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl 32(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %r8d
|
||||
; X64-AVX1-NEXT: vpextrd $3, %xmm2, %eax
|
||||
; X64-AVX1-NEXT: vmovdqa (%rsi), %xmm1
|
||||
; X64-AVX1-NEXT: vmovdqa 16(%rsi), %xmm3
|
||||
; X64-AVX1-NEXT: vpextrd $3, %xmm3, %ecx
|
||||
; X64-AVX1-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: divl 28(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %r9d
|
||||
; X64-AVX1-NEXT: vpextrd $2, %xmm2, %eax
|
||||
; X64-AVX1-NEXT: vpextrd $2, %xmm3, %ecx
|
||||
; X64-AVX1-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: divl 24(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %r10d
|
||||
; X64-AVX1-NEXT: vpextrd $1, %xmm2, %eax
|
||||
; X64-AVX1-NEXT: vpextrd $1, %xmm3, %ecx
|
||||
; X64-AVX1-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: divl 20(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %r11d
|
||||
; X64-AVX1-NEXT: vmovd %xmm2, %eax
|
||||
; X64-AVX1-NEXT: vmovd %xmm3, %ecx
|
||||
; X64-AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: movl %edx, %esi
|
||||
; X64-AVX1-NEXT: divl 16(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %ecx
|
||||
; X64-AVX1-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: vpextrd $3, %xmm1, %ecx
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: divl 12(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %edi
|
||||
; X64-AVX1-NEXT: vpextrd $2, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: vpextrd $2, %xmm1, %ecx
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ecx
|
||||
; X64-AVX1-NEXT: movl %edx, %ecx
|
||||
; X64-AVX1-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: vpextrd $1, %xmm1, %ebx
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ebx
|
||||
; X64-AVX1-NEXT: divl 8(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %ebx
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: vmovd %xmm1, %ebp
|
||||
; X64-AVX1-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl %ebp
|
||||
; X64-AVX1-NEXT: divl 4(%rsi)
|
||||
; X64-AVX1-NEXT: movl %edx, %ebp
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorl %edx, %edx
|
||||
; X64-AVX1-NEXT: divl (%rsi)
|
||||
; X64-AVX1-NEXT: vmovd %edx, %xmm0
|
||||
; X64-AVX1-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpinsrd $1, %ebp, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpinsrd $2, %ebx, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
|
||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %esi, %xmm2
|
||||
; X64-AVX1-NEXT: vmovd %ecx, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
|
||||
|
@ -2455,52 +2419,42 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
|||
; X64-AVX2: # %bb.0:
|
||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; X64-AVX2-NEXT: vmovdqa (%rsi), %xmm2
|
||||
; X64-AVX2-NEXT: vmovdqa 16(%rsi), %xmm3
|
||||
; X64-AVX2-NEXT: vpextrd $1, %xmm3, %ecx
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
|
||||
; X64-AVX2-NEXT: vpextrd $1, %xmm4, %eax
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; X64-AVX2-NEXT: vpextrd $1, %xmm2, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: divl 20(%rsi)
|
||||
; X64-AVX2-NEXT: movl %edx, %ecx
|
||||
; X64-AVX2-NEXT: vmovd %xmm3, %edi
|
||||
; X64-AVX2-NEXT: vmovd %xmm4, %eax
|
||||
; X64-AVX2-NEXT: vmovd %xmm2, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %edi
|
||||
; X64-AVX2-NEXT: vmovd %edx, %xmm5
|
||||
; X64-AVX2-NEXT: vpinsrd $1, %ecx, %xmm5, %xmm5
|
||||
; X64-AVX2-NEXT: vpextrd $2, %xmm3, %ecx
|
||||
; X64-AVX2-NEXT: vpextrd $2, %xmm4, %eax
|
||||
; X64-AVX2-NEXT: divl 16(%rsi)
|
||||
; X64-AVX2-NEXT: vmovd %edx, %xmm3
|
||||
; X64-AVX2-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
|
||||
; X64-AVX2-NEXT: vpextrd $2, %xmm2, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
|
||||
; X64-AVX2-NEXT: vpextrd $3, %xmm3, %ecx
|
||||
; X64-AVX2-NEXT: vpextrd $3, %xmm4, %eax
|
||||
; X64-AVX2-NEXT: divl 24(%rsi)
|
||||
; X64-AVX2-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
|
||||
; X64-AVX2-NEXT: vpextrd $3, %xmm2, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: vpinsrd $3, %edx, %xmm5, %xmm3
|
||||
; X64-AVX2-NEXT: vpextrd $1, %xmm2, %ecx
|
||||
; X64-AVX2-NEXT: divl 28(%rsi)
|
||||
; X64-AVX2-NEXT: vpinsrd $3, %edx, %xmm3, %xmm2
|
||||
; X64-AVX2-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: divl 4(%rsi)
|
||||
; X64-AVX2-NEXT: movl %edx, %ecx
|
||||
; X64-AVX2-NEXT: vmovd %xmm2, %edi
|
||||
; X64-AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %edi
|
||||
; X64-AVX2-NEXT: vmovd %edx, %xmm4
|
||||
; X64-AVX2-NEXT: vpinsrd $1, %ecx, %xmm4, %xmm4
|
||||
; X64-AVX2-NEXT: vpextrd $2, %xmm2, %ecx
|
||||
; X64-AVX2-NEXT: divl (%rsi)
|
||||
; X64-AVX2-NEXT: vmovd %edx, %xmm3
|
||||
; X64-AVX2-NEXT: vpinsrd $1, %ecx, %xmm3, %xmm3
|
||||
; X64-AVX2-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
|
||||
; X64-AVX2-NEXT: vpextrd $3, %xmm2, %ecx
|
||||
; X64-AVX2-NEXT: divl 8(%rsi)
|
||||
; X64-AVX2-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
|
||||
; X64-AVX2-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl %ecx
|
||||
; X64-AVX2-NEXT: vpinsrd $3, %edx, %xmm4, %xmm1
|
||||
; X64-AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
|
||||
; X64-AVX2-NEXT: divl 12(%rsi)
|
||||
; X64-AVX2-NEXT: vpinsrd $3, %edx, %xmm3, %xmm1
|
||||
; X64-AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl 32(%rsi)
|
||||
|
|
|
@ -156,7 +156,7 @@ define <3 x i16> @h(<3 x i32> %a) nounwind {
|
|||
; CHECK-WIN-LABEL: h:
|
||||
; CHECK-WIN: # %bb.0:
|
||||
; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0
|
||||
; CHECK-WIN-NEXT: movd %xmm0, %eax
|
||||
; CHECK-WIN-NEXT: movl (%rcx), %eax
|
||||
; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx
|
||||
; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx
|
||||
; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
|
|
|
@ -2895,8 +2895,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
|||
; SSE2-LABEL: sitofp_load_2i64_to_2f64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
|
@ -2906,43 +2905,30 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
|||
;
|
||||
; SSE41-LABEL: sitofp_load_2i64_to_2f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE41-NEXT: movq %xmm0, %rax
|
||||
; SSE41-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE41-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE41-NEXT: cvtsi2sdq 8(%rdi), %xmm1
|
||||
; SSE41-NEXT: cvtsi2sdq (%rdi), %xmm0
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: sitofp_load_2i64_to_2f64:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
||||
; VEX-NEXT: vmovq %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
|
||||
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
|
||||
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
|
||||
|
@ -3092,16 +3078,14 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) {
|
|||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE2-NEXT: cvtsi2sdq 16(%rdi), %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
|
@ -3111,72 +3095,46 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) {
|
|||
;
|
||||
; SSE41-LABEL: sitofp_load_4i64_to_4f64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE41-NEXT: movdqa 16(%rdi), %xmm1
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cvtsi2sd %rax, %xmm2
|
||||
; SSE41-NEXT: movq %xmm0, %rax
|
||||
; SSE41-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE41-NEXT: cvtsi2sd %rax, %xmm0
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE41-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE41-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE41-NEXT: cvtsi2sd %rax, %xmm2
|
||||
; SSE41-NEXT: movq %xmm1, %rax
|
||||
; SSE41-NEXT: cvtsi2sdq 8(%rdi), %xmm1
|
||||
; SSE41-NEXT: cvtsi2sdq (%rdi), %xmm0
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE41-NEXT: cvtsi2sdq 24(%rdi), %xmm2
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: cvtsi2sd %rax, %xmm1
|
||||
; SSE41-NEXT: cvtsi2sdq 16(%rdi), %xmm1
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: sitofp_load_4i64_to_4f64:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovapd (%rdi), %xmm0
|
||||
; VEX-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; VEX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
||||
; VEX-NEXT: vmovq %xmm1, %rax
|
||||
; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
|
||||
; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
|
||||
; VEX-NEXT: vmovq %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
||||
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; VEX-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
|
||||
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
|
||||
; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
|
||||
; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovapd (%rdi), %xmm0
|
||||
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
||||
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovapd (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
|
||||
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64:
|
||||
|
@ -3881,16 +3839,14 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm1
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm2
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
|
@ -3901,72 +3857,47 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||
;
|
||||
; SSE41-LABEL: sitofp_load_4i64_to_4f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE41-NEXT: movdqa 16(%rdi), %xmm1
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE41-NEXT: movq %xmm0, %rax
|
||||
; SSE41-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; SSE41-NEXT: movq %xmm1, %rax
|
||||
; SSE41-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; SSE41-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE41-NEXT: cvtsi2ssq 8(%rdi), %xmm1
|
||||
; SSE41-NEXT: cvtsi2ssq (%rdi), %xmm0
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ssq 16(%rdi), %xmm1
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ssq 24(%rdi), %xmm1
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: sitofp_load_4i64_to_4f32:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; VEX-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
||||
; VEX-NEXT: vmovq %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; VEX-NEXT: vmovq %xmm1, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; VEX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||
; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: sitofp_load_4i64_to_4f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; AVX512VL-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -4060,33 +3991,29 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm3
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0]
|
||||
; SSE2-NEXT: movq %xmm3, %rax
|
||||
; SSE2-NEXT: xorps %xmm4, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm4
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm1, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ssq 32(%rdi), %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
|
@ -4097,132 +4024,82 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
;
|
||||
; SSE41-LABEL: sitofp_load_8i64_to_8f32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE41-NEXT: movdqa 16(%rdi), %xmm1
|
||||
; SSE41-NEXT: movdqa 32(%rdi), %xmm2
|
||||
; SSE41-NEXT: movdqa 48(%rdi), %xmm3
|
||||
; SSE41-NEXT: pextrq $1, %xmm0, %rax
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE41-NEXT: movq %xmm0, %rax
|
||||
; SSE41-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2,3]
|
||||
; SSE41-NEXT: movq %xmm1, %rax
|
||||
; SSE41-NEXT: xorps %xmm4, %xmm4
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm4[0],xmm0[3]
|
||||
; SSE41-NEXT: pextrq $1, %xmm1, %rax
|
||||
; SSE41-NEXT: cvtsi2ssq 8(%rdi), %xmm1
|
||||
; SSE41-NEXT: cvtsi2ssq (%rdi), %xmm0
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ssq 16(%rdi), %xmm1
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ssq 24(%rdi), %xmm1
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; SSE41-NEXT: pextrq $1, %xmm2, %rax
|
||||
; SSE41-NEXT: xorps %xmm4, %xmm4
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE41-NEXT: movq %xmm2, %rax
|
||||
; SSE41-NEXT: cvtsi2ssq 40(%rdi), %xmm2
|
||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2,3]
|
||||
; SSE41-NEXT: movq %xmm3, %rax
|
||||
; SSE41-NEXT: cvtsi2ssq 32(%rdi), %xmm1
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
|
||||
; SSE41-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE41-NEXT: cvtsi2ssq 48(%rdi), %xmm2
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
||||
; SSE41-NEXT: pextrq $1, %xmm3, %rax
|
||||
; SSE41-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE41-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE41-NEXT: cvtsi2ssq 56(%rdi), %xmm2
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; VEX-LABEL: sitofp_load_8i64_to_8f32:
|
||||
; VEX: # %bb.0:
|
||||
; VEX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; VEX-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; VEX-NEXT: vmovdqa 32(%rdi), %xmm2
|
||||
; VEX-NEXT: vmovdqa 48(%rdi), %xmm3
|
||||
; VEX-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
||||
; VEX-NEXT: vmovq %xmm2, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3]
|
||||
; VEX-NEXT: vmovq %xmm3, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
|
||||
; VEX-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
|
||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; VEX-NEXT: vmovq %xmm0, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; VEX-NEXT: vmovq %xmm1, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
|
||||
; VEX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
||||
; VEX-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
|
||||
; VEX-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; VEX-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; VEX-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; VEX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
|
||||
; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
||||
; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
||||
; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
|
||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||
; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2
|
||||
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
||||
; AVX512F-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512F-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512F-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
||||
; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
||||
; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vmovq %xmm2, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3]
|
||||
; AVX512VL-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; AVX512VL-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
||||
; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32:
|
||||
|
@ -4352,7 +4229,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||
; SSE2-LABEL: uitofp_load_4i64_to_4f32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: movq 16(%rdi), %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB83_1
|
||||
; SSE2-NEXT: # %bb.2:
|
||||
|
@ -4366,23 +4243,23 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB83_3:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm2
|
||||
; SSE2-NEXT: movq (%rdi), %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: testq %rcx, %rcx
|
||||
; SSE2-NEXT: js .LBB83_4
|
||||
; SSE2-NEXT: # %bb.5:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
|
||||
; SSE2-NEXT: jmp .LBB83_6
|
||||
; SSE2-NEXT: .LBB83_4:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: addss %xmm3, %xmm3
|
||||
; SSE2-NEXT: movq %rcx, %rdx
|
||||
; SSE2-NEXT: shrq %rdx
|
||||
; SSE2-NEXT: andl $1, %ecx
|
||||
; SSE2-NEXT: orq %rdx, %rcx
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm2
|
||||
; SSE2-NEXT: addss %xmm2, %xmm2
|
||||
; SSE2-NEXT: .LBB83_6:
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm3
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB83_7
|
||||
; SSE2-NEXT: # %bb.8:
|
||||
|
@ -4398,8 +4275,8 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: addss %xmm0, %xmm0
|
||||
; SSE2-NEXT: .LBB83_9:
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB83_10
|
||||
|
@ -4520,35 +4397,23 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||
;
|
||||
; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
|
||||
; AVX512VL-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -4701,7 +4566,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
; SSE2-LABEL: uitofp_load_8i64_to_8f32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa 16(%rdi), %xmm0
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: movq 16(%rdi), %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_1
|
||||
; SSE2-NEXT: # %bb.2:
|
||||
|
@ -4715,23 +4580,23 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
; SSE2-NEXT: cvtsi2ss %rax, %xmm2
|
||||
; SSE2-NEXT: addss %xmm2, %xmm2
|
||||
; SSE2-NEXT: .LBB87_3:
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm3
|
||||
; SSE2-NEXT: movq (%rdi), %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm0, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: movq %xmm0, %rcx
|
||||
; SSE2-NEXT: testq %rcx, %rcx
|
||||
; SSE2-NEXT: js .LBB87_4
|
||||
; SSE2-NEXT: # %bb.5:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm1
|
||||
; SSE2-NEXT: jmp .LBB87_6
|
||||
; SSE2-NEXT: .LBB87_4:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm1
|
||||
; SSE2-NEXT: movq %rcx, %rdx
|
||||
; SSE2-NEXT: shrq %rdx
|
||||
; SSE2-NEXT: andl $1, %ecx
|
||||
; SSE2-NEXT: orq %rdx, %rcx
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm1
|
||||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB87_6:
|
||||
; SSE2-NEXT: movq %xmm3, %rax
|
||||
; SSE2-NEXT: movdqa (%rdi), %xmm3
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_7
|
||||
; SSE2-NEXT: # %bb.8:
|
||||
|
@ -4747,23 +4612,23 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
|
||||
; SSE2-NEXT: addss %xmm0, %xmm0
|
||||
; SSE2-NEXT: .LBB87_9:
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm6
|
||||
; SSE2-NEXT: movq 48(%rdi), %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm3, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: movq %xmm3, %rcx
|
||||
; SSE2-NEXT: testq %rcx, %rcx
|
||||
; SSE2-NEXT: js .LBB87_10
|
||||
; SSE2-NEXT: # %bb.11:
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm4
|
||||
; SSE2-NEXT: jmp .LBB87_12
|
||||
; SSE2-NEXT: .LBB87_10:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm4
|
||||
; SSE2-NEXT: movq %rcx, %rdx
|
||||
; SSE2-NEXT: shrq %rdx
|
||||
; SSE2-NEXT: andl $1, %ecx
|
||||
; SSE2-NEXT: orq %rdx, %rcx
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm4
|
||||
; SSE2-NEXT: addss %xmm4, %xmm4
|
||||
; SSE2-NEXT: .LBB87_12:
|
||||
; SSE2-NEXT: movq %xmm6, %rax
|
||||
; SSE2-NEXT: movdqa 48(%rdi), %xmm5
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_13
|
||||
; SSE2-NEXT: # %bb.14:
|
||||
|
@ -4779,27 +4644,27 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
; SSE2-NEXT: cvtsi2ss %rax, %xmm3
|
||||
; SSE2-NEXT: addss %xmm3, %xmm3
|
||||
; SSE2-NEXT: .LBB87_15:
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm5
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm6, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: movq 32(%rdi), %rax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm5, %rcx
|
||||
; SSE2-NEXT: testq %rcx, %rcx
|
||||
; SSE2-NEXT: js .LBB87_16
|
||||
; SSE2-NEXT: # %bb.17:
|
||||
; SSE2-NEXT: xorps %xmm6, %xmm6
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm6
|
||||
; SSE2-NEXT: xorps %xmm5, %xmm5
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm5
|
||||
; SSE2-NEXT: jmp .LBB87_18
|
||||
; SSE2-NEXT: .LBB87_16:
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq %rcx
|
||||
; SSE2-NEXT: andl $1, %eax
|
||||
; SSE2-NEXT: orq %rcx, %rax
|
||||
; SSE2-NEXT: xorps %xmm6, %xmm6
|
||||
; SSE2-NEXT: cvtsi2ss %rax, %xmm6
|
||||
; SSE2-NEXT: addss %xmm6, %xmm6
|
||||
; SSE2-NEXT: movq %rcx, %rdx
|
||||
; SSE2-NEXT: shrq %rdx
|
||||
; SSE2-NEXT: andl $1, %ecx
|
||||
; SSE2-NEXT: orq %rdx, %rcx
|
||||
; SSE2-NEXT: xorps %xmm5, %xmm5
|
||||
; SSE2-NEXT: cvtsi2ss %rcx, %xmm5
|
||||
; SSE2-NEXT: addss %xmm5, %xmm5
|
||||
; SSE2-NEXT: .LBB87_18:
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
; SSE2-NEXT: movq %xmm5, %rax
|
||||
; SSE2-NEXT: movdqa 32(%rdi), %xmm4
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_19
|
||||
; SSE2-NEXT: # %bb.20:
|
||||
|
@ -4816,8 +4681,8 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
; SSE2-NEXT: addss %xmm1, %xmm1
|
||||
; SSE2-NEXT: .LBB87_21:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,2,3]
|
||||
; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,2,3]
|
||||
; SSE2-NEXT: movq %xmm2, %rax
|
||||
; SSE2-NEXT: testq %rax, %rax
|
||||
; SSE2-NEXT: js .LBB87_22
|
||||
|
@ -5021,64 +4886,40 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||
;
|
||||
; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2
|
||||
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512F-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm0
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; AVX512F-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1
|
||||
; AVX512F-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512F-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512F-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
|
||||
; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
||||
; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
||||
; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vmovq %xmm2, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3]
|
||||
; AVX512VL-NEXT: vmovq %xmm3, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512VL-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm0
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; AVX512VL-NEXT: vmovq %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
||||
; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||
; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32:
|
||||
|
|
Loading…
Reference in New Issue