forked from OSchip/llvm-project
[X86][AVX] Move lowerShuffleWithVPMOV inside explicit shuffle lowering cases
Perform lowerShuffleWithVPMOV as part of the v16i8/v8i16 shuffle lowering stages, which are the only types that are currently supported. We need to expand support for lowering shuffles as truncations to fix the remaining regressions in D66004
This commit is contained in:
parent
cd2139a527
commit
1d2ede87ea
|
@ -11329,8 +11329,7 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
MVT VT, SDValue V1, SDValue V2,
|
||||
SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
if (VT != MVT::v16i8 && VT != MVT::v8i16)
|
||||
return SDValue();
|
||||
assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type");
|
||||
|
||||
if (Mask.size() != VT.getVectorNumElements())
|
||||
return SDValue();
|
||||
|
@ -14732,6 +14731,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
Zeroable, Subtarget, DAG))
|
||||
return ZExt;
|
||||
|
||||
// Try to use lower using a truncation.
|
||||
if (SDValue V =
|
||||
lowerShuffleWithVPMOV(DL, Mask, MVT::v8i16, V1, V2, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
|
||||
|
||||
if (NumV2Inputs == 0) {
|
||||
|
@ -14916,6 +14920,11 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
Zeroable, Subtarget, DAG))
|
||||
return ZExt;
|
||||
|
||||
// Try to use lower using a truncation.
|
||||
if (SDValue V =
|
||||
lowerShuffleWithVPMOV(DL, Mask, MVT::v16i8, V1, V2, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
// See if we can use SSE4A Extraction / Insertion.
|
||||
if (Subtarget.hasSSE4A())
|
||||
if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
|
||||
|
@ -18003,9 +18012,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
|
|||
std::swap(V1, V2);
|
||||
}
|
||||
|
||||
if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
// For each vector width, delegate to a specialized lowering routine.
|
||||
if (VT.is128BitVector())
|
||||
return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
|
||||
|
|
Loading…
Reference in New Issue