[X86] Move computeZeroableShuffleElements before getTargetShuffleAndZeroables. NFCI.

Prep work toward merging some of the functionality.
This commit is contained in:
Simon Pilgrim 2019-11-02 13:38:35 +00:00
parent c4b757be02
commit 254b8461ac
1 changed files with 87 additions and 87 deletions

View File

@ -6712,6 +6712,93 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
return true;
}
/// Compute whether each element of a shuffle is zeroable.
///
/// A "zeroable" vector shuffle element is one which can be lowered to zero.
/// Either it is an undef element in the shuffle mask, the element of the input
/// referenced is undef, or the element of the input referenced is known to be
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
static void computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2,
APInt &KnownUndef, APInt &KnownZero) {
int Size = Mask.size();
KnownUndef = KnownZero = APInt::getNullValue(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
int VectorSizeInBits = V1.getValueSizeInBits();
int ScalarSizeInBits = VectorSizeInBits / Size;
assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
for (int i = 0; i < Size; ++i) {
int M = Mask[i];
// Handle the easy cases.
if (M < 0) {
KnownUndef.setBit(i);
continue;
}
if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
KnownZero.setBit(i);
continue;
}
// Determine shuffle input and normalize the mask.
SDValue V = M < Size ? V1 : V2;
M %= Size;
// Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
if (V.getOpcode() != ISD::BUILD_VECTOR)
continue;
// If the BUILD_VECTOR has fewer elements then the bitcasted portion of
// the (larger) source element must be UNDEF/ZERO.
if ((Size % V.getNumOperands()) == 0) {
int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale);
if (Op.isUndef())
KnownUndef.setBit(i);
if (X86::isZeroNode(Op))
KnownZero.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
if (Val == 0)
KnownZero.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
if (Val == 0)
KnownZero.setBit(i);
}
continue;
}
// If the BUILD_VECTOR has more elements then all the (smaller) source
// elements must be UNDEF or ZERO.
if ((V.getNumOperands() % Size) == 0) {
int Scale = V->getNumOperands() / Size;
bool AllUndef = true;
bool AllZero = true;
for (int j = 0; j < Scale; ++j) {
SDValue Op = V.getOperand((M * Scale) + j);
AllUndef &= Op.isUndef();
AllZero &= X86::isZeroNode(Op);
}
if (AllUndef)
KnownUndef.setBit(i);
if (AllZero)
KnownZero.setBit(i);
continue;
}
}
}
/// Decode a target shuffle mask and inputs and see if any values are
/// known to be undef or zero from their inputs.
/// Returns true if the target shuffle mask was decoded.
@ -10418,93 +10505,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
/// Compute whether each element of a shuffle is zeroable.
///
/// A "zeroable" vector shuffle element is one which can be lowered to zero.
/// Either it is an undef element in the shuffle mask, the element of the input
/// referenced is undef, or the element of the input referenced is known to be
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
static void computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2,
APInt &KnownUndef, APInt &KnownZero) {
int Size = Mask.size();
KnownUndef = KnownZero = APInt::getNullValue(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
int VectorSizeInBits = V1.getValueSizeInBits();
int ScalarSizeInBits = VectorSizeInBits / Size;
assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
for (int i = 0; i < Size; ++i) {
int M = Mask[i];
// Handle the easy cases.
if (M < 0) {
KnownUndef.setBit(i);
continue;
}
if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
KnownZero.setBit(i);
continue;
}
// Determine shuffle input and normalize the mask.
SDValue V = M < Size ? V1 : V2;
M %= Size;
// Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
if (V.getOpcode() != ISD::BUILD_VECTOR)
continue;
// If the BUILD_VECTOR has fewer elements then the bitcasted portion of
// the (larger) source element must be UNDEF/ZERO.
if ((Size % V.getNumOperands()) == 0) {
int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale);
if (Op.isUndef())
KnownUndef.setBit(i);
if (X86::isZeroNode(Op))
KnownZero.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
if (Val == 0)
KnownZero.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
if (Val == 0)
KnownZero.setBit(i);
}
continue;
}
// If the BUILD_VECTOR has more elements then all the (smaller) source
// elements must be UNDEF or ZERO.
if ((V.getNumOperands() % Size) == 0) {
int Scale = V->getNumOperands() / Size;
bool AllUndef = true;
bool AllZero = true;
for (int j = 0; j < Scale; ++j) {
SDValue Op = V.getOperand((M * Scale) + j);
AllUndef &= Op.isUndef();
AllZero &= X86::isZeroNode(Op);
}
if (AllUndef)
KnownUndef.setBit(i);
if (AllZero)
KnownZero.setBit(i);
continue;
}
}
}
// The Shuffle result is as follow:
// 0*a[0]0*a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
// Each Zeroable's element correspond to a particular Mask's element.