forked from OSchip/llvm-project
[X86] Fold ZERO_EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0)
Helps avoid some unnecessary shift by splat amount extensions before shuffle combining gets limited by with one use checks
This commit is contained in:
parent
bad1b7fbb0
commit
74b98ab1db
|
@ -53526,6 +53526,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
|
|||
unsigned Opcode = N->getOpcode();
|
||||
unsigned InOpcode = In.getOpcode();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDLoc DL(N);
|
||||
|
||||
// Try to merge vector loads and extend_inreg to an extload.
|
||||
if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
|
||||
|
@ -53538,10 +53539,9 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
|
|||
: ISD::ZEXTLOAD;
|
||||
EVT MemVT = VT.changeVectorElementType(SVT);
|
||||
if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
|
||||
SDValue Load =
|
||||
DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
|
||||
Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
|
||||
Ld->getMemOperand()->getFlags());
|
||||
SDValue Load = DAG.getExtLoad(
|
||||
Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
|
||||
MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags());
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
|
||||
return Load;
|
||||
}
|
||||
|
@ -53550,7 +53550,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
// Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
|
||||
if (Opcode == InOpcode)
|
||||
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0));
|
||||
return DAG.getNode(Opcode, DL, VT, In.getOperand(0));
|
||||
|
||||
// Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
|
||||
// -> EXTEND_VECTOR_INREG(X).
|
||||
|
@ -53559,7 +53559,21 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
|
|||
In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) &&
|
||||
In.getOperand(0).getOperand(0).getValueSizeInBits() ==
|
||||
In.getValueSizeInBits())
|
||||
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0));
|
||||
return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0));
|
||||
|
||||
// Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0).
|
||||
// TODO: Move to DAGCombine?
|
||||
if (!DCI.isBeforeLegalizeOps() && Opcode == ISD::ZERO_EXTEND_VECTOR_INREG &&
|
||||
In.getOpcode() == ISD::BUILD_VECTOR && In.hasOneUse() &&
|
||||
In.getValueSizeInBits() == VT.getSizeInBits()) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits();
|
||||
EVT EltVT = In.getOperand(0).getValueType();
|
||||
SmallVector<SDValue> Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT));
|
||||
for (unsigned I = 0; I != NumElts; ++I)
|
||||
Elts[I * Scale] = In.getOperand(I);
|
||||
return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts));
|
||||
}
|
||||
|
||||
// Attempt to combine as a shuffle.
|
||||
// TODO: General ZERO_EXTEND_VECTOR_INREG support.
|
||||
|
|
|
@ -1778,31 +1778,19 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
|||
}
|
||||
|
||||
define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
|
||||
; SSE2-LABEL: PR52719:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movd %edi, %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; SSE2-NEXT: psrlq %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrlq %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: psubq %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: PR52719:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movd %edi, %xmm1
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; SSE41-NEXT: psrlq %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrlq %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE41-NEXT: psubq %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: PR52719:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movd %edi, %xmm1
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; SSE-NEXT: psrlq %xmm1, %xmm2
|
||||
; SSE-NEXT: psrlq %xmm1, %xmm0
|
||||
; SSE-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE-NEXT: psubq %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: PR52719:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm1
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
|
@ -1813,8 +1801,7 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
|
|||
; XOPAVX1-LABEL: PR52719:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovd %edi, %xmm1
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
|
||||
; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
|
||||
|
@ -1823,8 +1810,7 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
|
|||
; XOPAVX2-LABEL: PR52719:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vmovd %edi, %xmm1
|
||||
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
|
||||
; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0
|
||||
|
@ -1834,7 +1820,6 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512-NEXT: vmovd %edi, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
@ -1843,7 +1828,6 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) {
|
|||
; AVX512VL-LABEL: PR52719:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovd %edi, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -2148,8 +2148,6 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
|
|||
; AVX2-LABEL: PR52719:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovd %edi, %xmm1
|
||||
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
|
||||
|
@ -2175,8 +2173,6 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
|
|||
; XOPAVX2-LABEL: PR52719:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vmovd %edi, %xmm1
|
||||
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
|
||||
; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
|
||||
|
@ -2188,16 +2184,13 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512-NEXT: vmovd %edi, %xmm1
|
||||
; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: PR52719:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vmovd %edi, %xmm1
|
||||
; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -501,8 +501,6 @@ define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) {
|
|||
; ALL-LABEL: PR52719:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovd %edi, %xmm1
|
||||
; ALL-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%vec = insertelement <8 x i32> poison, i32 %a1, i64 0
|
||||
|
|
Loading…
Reference in New Issue