forked from OSchip/llvm-project
[X86][AVX] combineExtractWithShuffle - combine extracts from 256/512-bit vector shuffles.
We can only legally extract from the lowest 128-bit subvector, so extract the correct subvector to allow us to handle 256/512-bit vector element extracts.
This commit is contained in:
parent
6ab792b68d
commit
5211af4818
|
@ -40277,10 +40277,21 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
// We can only legally extract other elements from 128-bit vectors and in
|
||||
// certain circumstances, depending on SSE-level.
|
||||
// TODO: Investigate using extract_subvector for larger vectors.
|
||||
// TODO: Investigate float/double extraction if it will be just stored.
|
||||
auto GetLegalExtract = [&Subtarget, &DAG, &dl](SDValue Vec, EVT VecVT,
|
||||
unsigned Idx) {
|
||||
EVT VecSVT = VecVT.getScalarType();
|
||||
if ((VecVT.is256BitVector() || VecVT.is512BitVector()) &&
|
||||
(VecSVT == MVT::i8 || VecSVT == MVT::i16 || VecSVT == MVT::i32 ||
|
||||
VecSVT == MVT::i64)) {
|
||||
unsigned EltSizeInBits = VecSVT.getSizeInBits();
|
||||
unsigned NumEltsPerLane = 128 / EltSizeInBits;
|
||||
unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits;
|
||||
unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits();
|
||||
VecVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumEltsPerLane);
|
||||
Vec = extract128BitVector(Vec, LaneIdx, DAG, dl);
|
||||
Idx &= (NumEltsPerLane - 1);
|
||||
}
|
||||
if ((VecVT == MVT::v4i32 || VecVT == MVT::v2i64) &&
|
||||
((Idx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) {
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VecVT.getScalarType(),
|
||||
|
|
|
@ -800,34 +800,32 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
|
|||
;
|
||||
; AVX1-64-LABEL: uitofp_v4i64_v4f64:
|
||||
; AVX1-64: # %bb.0:
|
||||
; AVX1-64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX1-64-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-64-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-64-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-64-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
||||
; AVX1-64-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
|
||||
; AVX1-64-NEXT: vmovq %xmm2, %rax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
|
||||
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX1-64-NEXT: vextractps $2, %xmm0, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX1-64-NEXT: vmovq %xmm1, %rax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm1
|
||||
; AVX1-64-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-64-NEXT: movl %eax, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
|
||||
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
|
||||
; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-64-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
|
||||
; AVX1-64-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
|
||||
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-64-NEXT: vextractps $3, %xmm2, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX1-64-NEXT: vextractps $1, %xmm2, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
|
||||
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-64-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
|
||||
; AVX1-64-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
|
||||
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
|
||||
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVX1-64-NEXT: retq
|
||||
;
|
||||
; AVX2-64-LABEL: uitofp_v4i64_v4f64:
|
||||
|
@ -836,31 +834,29 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
|
|||
; AVX2-64-NEXT: vextractps $3, %xmm1, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
||||
; AVX2-64-NEXT: vextractps $1, %xmm1, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
|
||||
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2-64-NEXT: vextractps $3, %xmm0, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
|
||||
; AVX2-64-NEXT: vextractps $1, %xmm0, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
|
||||
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4.294967296E+9,4.294967296E+9,4.294967296E+9,4.294967296E+9]
|
||||
; AVX2-64-NEXT: vmulpd %ymm2, %ymm1, %ymm1
|
||||
; AVX2-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-64-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
|
||||
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-64-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX2-64-NEXT: vmovq %xmm2, %rax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
|
||||
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX2-64-NEXT: vextractps $3, %xmm0, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX2-64-NEXT: vextractps $1, %xmm0, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
|
||||
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
|
||||
; AVX2-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm3 = [4.294967296E+9,4.294967296E+9,4.294967296E+9,4.294967296E+9]
|
||||
; AVX2-64-NEXT: vmulpd %ymm3, %ymm2, %ymm2
|
||||
; AVX2-64-NEXT: vextractps $2, %xmm1, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
|
||||
; AVX2-64-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
|
||||
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; AVX2-64-NEXT: vextractps $2, %xmm0, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
|
||||
; AVX2-64-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
|
||||
; AVX2-64-NEXT: movl %eax, %eax
|
||||
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
|
||||
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; AVX2-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX2-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX2-64-NEXT: vaddpd %ymm0, %ymm2, %ymm0
|
||||
; AVX2-64-NEXT: retq
|
||||
;
|
||||
; AVX512F-64-LABEL: uitofp_v4i64_v4f64:
|
||||
|
|
|
@ -7541,34 +7541,32 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
|
|||
;
|
||||
; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vmovq %xmm2, %rax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX1-NEXT: vextractps $2, %xmm0, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vmovq %xmm1, %rax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: movl %eax, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vpextrd $3, %xmm1, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
|
||||
; AVX1-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vextractps $3, %xmm2, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vextractps $1, %xmm2, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
|
||||
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
|
||||
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64:
|
||||
|
|
Loading…
Reference in New Issue