[X86][SSE] Convert computeZeroableShuffleElements to emit KnownUndef and KnownZero

This commit is contained in:
Simon Pilgrim 2019-10-31 11:21:39 +00:00
parent 1eb04d289a
commit a780b94cd1
1 changed files with 35 additions and 23 deletions

View File

@ -6732,7 +6732,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
V1 = peekThroughBitcasts(V1); V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2); V2 = peekThroughBitcasts(V2);
assert((VT.getSizeInBits() % Mask.size()) == 0 && assert((VT.getSizeInBits() % Size) == 0 &&
"Illegal split of shuffle value type"); "Illegal split of shuffle value type");
unsigned EltSizeInBits = VT.getSizeInBits() / Size; unsigned EltSizeInBits = VT.getSizeInBits() / Size;
@ -10423,9 +10423,12 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining /// as many lanes with this technique as possible to simplify the remaining
/// shuffle. /// shuffle.
static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, static void computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2) { SDValue V1, SDValue V2,
APInt Zeroable(Mask.size(), 0); APInt &KnownUndef, APInt &KnownZero) {
int Size = Mask.size();
KnownUndef = KnownZero = APInt::getNullValue(Size);
V1 = peekThroughBitcasts(V1); V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2); V2 = peekThroughBitcasts(V2);
@ -10433,14 +10436,18 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
int VectorSizeInBits = V1.getValueSizeInBits(); int VectorSizeInBits = V1.getValueSizeInBits();
int ScalarSizeInBits = VectorSizeInBits / Mask.size(); int ScalarSizeInBits = VectorSizeInBits / Size;
assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
for (int i = 0, Size = Mask.size(); i < Size; ++i) { for (int i = 0; i < Size; ++i) {
int M = Mask[i]; int M = Mask[i];
// Handle the easy cases. // Handle the easy cases.
if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { if (M < 0) {
Zeroable.setBit(i); KnownUndef.setBit(i);
continue;
}
if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
KnownZero.setBit(i);
continue; continue;
} }
@ -10457,20 +10464,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
if ((Size % V.getNumOperands()) == 0) { if ((Size % V.getNumOperands()) == 0) {
int Scale = Size / V->getNumOperands(); int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale); SDValue Op = V.getOperand(M / Scale);
if (Op.isUndef() || X86::isZeroNode(Op)) if (Op.isUndef())
Zeroable.setBit(i); KnownUndef.setBit(i);
if (X86::isZeroNode(Op))
KnownZero.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue(); APInt Val = Cst->getAPIntValue();
Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0) if (Val == 0)
Zeroable.setBit(i); KnownZero.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt(); APInt Val = Cst->getValueAPF().bitcastToAPInt();
Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0) if (Val == 0)
Zeroable.setBit(i); KnownZero.setBit(i);
} }
continue; continue;
} }
@ -10479,18 +10486,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
// elements must be UNDEF or ZERO. // elements must be UNDEF or ZERO.
if ((V.getNumOperands() % Size) == 0) { if ((V.getNumOperands() % Size) == 0) {
int Scale = V->getNumOperands() / Size; int Scale = V->getNumOperands() / Size;
bool AllZeroable = true; bool AllUndef = true;
bool AllZero = true;
for (int j = 0; j < Scale; ++j) { for (int j = 0; j < Scale; ++j) {
SDValue Op = V.getOperand((M * Scale) + j); SDValue Op = V.getOperand((M * Scale) + j);
AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); AllUndef &= Op.isUndef();
AllZero &= X86::isZeroNode(Op);
} }
if (AllZeroable) if (AllUndef)
Zeroable.setBit(i); KnownUndef.setBit(i);
if (AllZero)
KnownZero.setBit(i);
continue; continue;
} }
} }
return Zeroable;
} }
// The Shuffle result is as follow: // The Shuffle result is as follow:
@ -17077,7 +17086,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// We actually see shuffles that are entirely re-arrangements of a set of // We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into // zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros. // simple ones. Directly lower these as a buildvector of zeros.
APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2); APInt KnownUndef, KnownZero;
computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
APInt Zeroable = KnownUndef | KnownZero;
if (Zeroable.isAllOnesValue()) if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL); return getZeroVector(VT, Subtarget, DAG, DL);