forked from OSchip/llvm-project
[X86][AVX] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86ISD::VPERMIL2
If we don't need the elements of the upper lanes, reduce the width of the X86ISD::VPERMIL2 node.
This commit is contained in:
parent
d56c6475a6
commit
e855efe424
|
@ -37165,7 +37165,25 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
}
|
||||
case X86ISD::VPERMIL2: {
|
||||
SDLoc DL(Op);
|
||||
MVT ExtVT = VT.getSimpleVT();
|
||||
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
|
||||
ExtSizeInBits / ExtVT.getScalarSizeInBits());
|
||||
SDValue Ext0 =
|
||||
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue Ext1 =
|
||||
extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue Ext2 =
|
||||
extractSubVector(Op.getOperand(2), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue ExtOp =
|
||||
TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Ext2, Op.getOperand(3));
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1104,11 +1104,8 @@ entry:
|
|||
define <4 x i32> @var_shuffle_v4i32_from_v8i32(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind {
|
||||
; XOP-LABEL: var_shuffle_v4i32_from_v8i32:
|
||||
; XOP: # %bb.0: # %entry
|
||||
; XOP-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
|
||||
; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; XOP-NEXT: vpermil2ps $0, %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vzeroupper
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue