forked from OSchip/llvm-project
[X86][AVX2] Improve sign/zero extension on AVX2 targets
Split extensions to large vectors into 256-bit chunks - the equivalent of what we do with pre-AVX2 into 128-bit chunks llvm-svn: 277939
This commit is contained in:
parent
28c889593a
commit
bc573ca1b8
|
@ -30475,11 +30475,9 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
|
||||||
: DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
|
: DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// On pre-AVX2 targets, split into 128-bit nodes of
|
auto SplitAndExtendInReg = [&](unsigned SplitSize) {
|
||||||
// ISD::*_EXTEND_VECTOR_INREG.
|
unsigned NumVecs = VT.getSizeInBits() / SplitSize;
|
||||||
if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128)) {
|
unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
|
||||||
unsigned NumVecs = VT.getSizeInBits() / 128;
|
|
||||||
unsigned NumSubElts = 128 / SVT.getSizeInBits();
|
|
||||||
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
|
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
|
||||||
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
|
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
|
||||||
|
|
||||||
|
@ -30487,14 +30485,24 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
|
||||||
for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
|
for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
|
||||||
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
|
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
|
||||||
DAG.getIntPtrConstant(Offset, DL));
|
DAG.getIntPtrConstant(Offset, DL));
|
||||||
SrcVec = ExtendVecSize(DL, SrcVec, 128);
|
SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
|
||||||
SrcVec = Opcode == ISD::SIGN_EXTEND
|
SrcVec = Opcode == ISD::SIGN_EXTEND
|
||||||
? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
|
? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
|
||||||
: DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
|
: DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
|
||||||
Opnds.push_back(SrcVec);
|
Opnds.push_back(SrcVec);
|
||||||
}
|
}
|
||||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
|
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
|
||||||
}
|
};
|
||||||
|
|
||||||
|
// On pre-AVX2 targets, split into 128-bit nodes of
|
||||||
|
// ISD::*_EXTEND_VECTOR_INREG.
|
||||||
|
if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
|
||||||
|
return SplitAndExtendInReg(128);
|
||||||
|
|
||||||
|
// On pre-AVX512 targets, split into 256-bit nodes of
|
||||||
|
// ISD::*_EXTEND_VECTOR_INREG.
|
||||||
|
if (!Subtarget.hasAVX512() && !(VT.getSizeInBits() % 256))
|
||||||
|
return SplitAndExtendInReg(256);
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,8 +167,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: sitofp_16i8_to_2f64:
|
; AVX2-LABEL: sitofp_16i8_to_2f64:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
|
|
||||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -370,8 +369,7 @@ define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: sitofp_16i8_to_4f64:
|
; AVX2-LABEL: sitofp_16i8_to_4f64:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
|
|
||||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -627,8 +625,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: uitofp_16i8_to_2f64:
|
; AVX2-LABEL: uitofp_16i8_to_2f64:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
|
||||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -909,8 +906,7 @@ define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: uitofp_16i8_to_4f64:
|
; AVX2-LABEL: uitofp_16i8_to_4f64:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
|
||||||
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1103,8 +1099,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: sitofp_16i8_to_4f32:
|
; AVX2-LABEL: sitofp_16i8_to_4f32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
|
|
||||||
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||||
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -1315,8 +1310,7 @@ define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: sitofp_16i8_to_8f32:
|
; AVX2-LABEL: sitofp_16i8_to_8f32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
|
||||||
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
|
|
||||||
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1692,8 +1686,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: uitofp_16i8_to_4f32:
|
; AVX2-LABEL: uitofp_16i8_to_4f32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
|
||||||
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||||
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -2089,8 +2082,7 @@ define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: uitofp_16i8_to_8f32:
|
; AVX2-LABEL: uitofp_16i8_to_8f32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
|
||||||
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
|
@ -407,15 +407,9 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: sext_16i8_to_8i64:
|
; AVX2-LABEL: sext_16i8_to_8i64:
|
||||||
; AVX2: # BB#0: # %entry
|
; AVX2: # BB#0: # %entry
|
||||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2
|
||||||
; AVX2-NEXT: vpslld $24, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpsrad $24, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpmovsxdq %xmm1, %ymm2
|
|
||||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1
|
||||||
; AVX2-NEXT: vpslld $24, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1
|
|
||||||
; AVX2-NEXT: vmovdqa %ymm2, %ymm0
|
; AVX2-NEXT: vmovdqa %ymm2, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue