forked from OSchip/llvm-project
[X86] XFormVExtractWithShuffleIntoLoad - handle shuffle mask scaling
If the target shuffle mask is from a wider type, attempt to scale the mask so that the extraction can attempt to peek through. Fixes the regression mentioned in rL368307 llvm-svn: 368308
This commit is contained in:
parent
67c246bbe6
commit
eb7a553db8
|
@ -34757,8 +34757,9 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
|
|||
/// folded into a single element load.
|
||||
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
|
||||
/// shuffles have been custom lowered so we need to handle those here.
|
||||
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
static SDValue
|
||||
XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
|
@ -34770,13 +34771,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
|
||||
EVT OriginalVT = InVec.getValueType();
|
||||
unsigned NumOriginalElts = OriginalVT.getVectorNumElements();
|
||||
|
||||
// Peek through bitcasts, don't duplicate a load with other uses.
|
||||
InVec = peekThroughOneUseBitcasts(InVec);
|
||||
|
||||
EVT CurrentVT = InVec.getValueType();
|
||||
if (!CurrentVT.isVector() ||
|
||||
CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
|
||||
if (!CurrentVT.isVector())
|
||||
return SDValue();
|
||||
|
||||
unsigned NumCurrentElts = CurrentVT.getVectorNumElements();
|
||||
if ((NumOriginalElts % NumCurrentElts) != 0)
|
||||
return SDValue();
|
||||
|
||||
if (!isTargetShuffle(InVec.getOpcode()))
|
||||
|
@ -34793,10 +34798,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
ShuffleOps, ShuffleMask, UnaryShuffle))
|
||||
return SDValue();
|
||||
|
||||
unsigned Scale = NumOriginalElts / NumCurrentElts;
|
||||
if (Scale > 1) {
|
||||
SmallVector<int, 16> ScaledMask;
|
||||
scaleShuffleMask<int>(Scale, ShuffleMask, ScaledMask);
|
||||
ShuffleMask = std::move(ScaledMask);
|
||||
}
|
||||
assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch");
|
||||
|
||||
// Select the input vector, guarding against out of range extract vector.
|
||||
unsigned NumElems = CurrentVT.getVectorNumElements();
|
||||
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
||||
int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
|
||||
int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt];
|
||||
|
||||
if (Idx == SM_SentinelZero)
|
||||
return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
|
||||
|
@ -34809,8 +34821,9 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
|
||||
return SDValue();
|
||||
|
||||
assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
|
||||
SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
|
||||
assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) &&
|
||||
"Shuffle index out of range");
|
||||
SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1];
|
||||
|
||||
// If inputs to shuffle are the same for both ops, then allow 2 uses
|
||||
unsigned AllowedUses =
|
||||
|
@ -34830,7 +34843,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
|
||||
|
||||
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
|
||||
if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
|
||||
return SDValue();
|
||||
|
||||
// If there's a bitcast before the shuffle, check if the load type and
|
||||
|
@ -34848,10 +34861,11 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
SDLoc dl(N);
|
||||
|
||||
// Create shuffle node taking into account the case that its a unary shuffle
|
||||
SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
|
||||
Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
|
||||
ShuffleMask);
|
||||
Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
|
||||
SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT)
|
||||
: DAG.getBitcast(OriginalVT, ShuffleOps[1]);
|
||||
Shuffle = DAG.getVectorShuffle(OriginalVT, dl,
|
||||
DAG.getBitcast(OriginalVT, ShuffleOps[0]),
|
||||
Shuffle, ShuffleMask);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
|
||||
EltNo);
|
||||
}
|
||||
|
|
|
@ -285,13 +285,12 @@ define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
|
|||
define float @extract_lane_insertps_6123(<4 x float> %a0, <4 x float> *%p1) {
|
||||
; SSE-LABEL: extract_lane_insertps_6123:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps (%rdi), %xmm0
|
||||
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: extract_lane_insertps_6123:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%a1 = load <4 x float>, <4 x float> *%p1
|
||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
|
||||
|
|
Loading…
Reference in New Issue