[X86][SSE] Refactor getTargetConstantBitsFromNode to avoid large APInts (PR32037)

Much of PR32037's compile time regression is due to getTargetConstantBitsFromNode always creating large (>64bit) APInts during the bitcasting from the source data to the destination bitwidth.

This commit avoids this bitcast stage if the data is already the correct bitwidth.

llvm-svn: 305284
This commit is contained in:
Simon Pilgrim 2017-06-13 10:13:48 +00:00
parent 8cc09eb8dd
commit 2b3b717768
1 changed files with 66 additions and 36 deletions

View File

@ -5314,20 +5314,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
unsigned NumElts = SizeInBits / EltSizeInBits; unsigned NumElts = SizeInBits / EltSizeInBits;
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); // Bitcast a source array of element bits to the target size.
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
unsigned NumSrcElts = UndefSrcElts.getBitWidth();
unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
"Constant bit sizes don't match");
// Don't split if we don't allow undef bits.
bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
if (UndefSrcElts.getBoolValue() && !AllowUndefs)
return false;
// If we're already the right size, don't bother bitcasting.
if (NumSrcElts == NumElts) {
UndefElts = UndefSrcElts;
EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
return true;
}
// Extract all the undef/constant element data and pack into single bitsets. // Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(SizeInBits, 0); APInt UndefBits(SizeInBits, 0);
APInt MaskBits(SizeInBits, 0); APInt MaskBits(SizeInBits, 0);
// Split the undef/constant single bitset data into the target elements. for (unsigned i = 0; i != NumSrcElts; ++i) {
auto SplitBitData = [&]() { unsigned BitOffset = i * SrcEltSizeInBits;
// Don't split if we don't allow undef bits. if (UndefSrcElts[i])
bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
if (UndefBits.getBoolValue() && !AllowUndefs) MaskBits.insertBits(SrcEltBits[i], BitOffset);
return false; }
// Split the undef/constant single bitset data into the target elements.
UndefElts = APInt(NumElts, 0); UndefElts = APInt(NumElts, 0);
EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
@ -5356,20 +5373,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Collect constant bits and insert into mask/undef bit masks. // Collect constant bits and insert into mask/undef bit masks.
auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
unsigned BitOffset) { unsigned UndefBitIndex) {
if (!Cst) if (!Cst)
return false; return false;
if (isa<UndefValue>(Cst)) { if (isa<UndefValue>(Cst)) {
unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); Undefs.setBit(UndefBitIndex);
Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
return true; return true;
} }
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
Mask.insertBits(CInt->getValue(), BitOffset); Mask = CInt->getValue();
return true; return true;
} }
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset); Mask = CFP->getValueAPF().bitcastToAPInt();
return true; return true;
} }
return false; return false;
@ -5377,18 +5393,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Extract constant bits from build vector. // Extract constant bits from build vector.
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
const SDValue &Src = Op.getOperand(i); const SDValue &Src = Op.getOperand(i);
unsigned BitOffset = i * SrcEltSizeInBits;
if (Src.isUndef()) { if (Src.isUndef()) {
UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); UndefSrcElts.setBit(i);
continue; continue;
} }
auto *Cst = cast<ConstantSDNode>(Src); auto *Cst = cast<ConstantSDNode>(Src);
APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
MaskBits.insertBits(Bits, BitOffset);
} }
return SplitBitData(); return CastBitData(UndefSrcElts, SrcEltBits);
} }
// Extract constant bits from constant pool vector. // Extract constant bits from constant pool vector.
@ -5397,27 +5416,33 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits())) if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
return false; return false;
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) unsigned NumSrcElts = CstTy->getVectorNumElements();
if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
i * CstEltSizeInBits)) APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
for (unsigned i = 0; i != NumSrcElts; ++i)
if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
UndefSrcElts, i))
return false; return false;
return SplitBitData(); return CastBitData(UndefSrcElts, SrcEltBits);
} }
// Extract constant bits from a broadcasted constant pool scalar. // Extract constant bits from a broadcasted constant pool scalar.
if (Op.getOpcode() == X86ISD::VBROADCAST && if (Op.getOpcode() == X86ISD::VBROADCAST &&
EltSizeInBits <= SrcEltSizeInBits) { EltSizeInBits <= VT.getScalarSizeInBits()) {
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
APInt Bits(SizeInBits, 0); unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
APInt Undefs(SizeInBits, 0); unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
for (unsigned i = 0; i != NumSrcElts; ++i) { APInt UndefSrcElts(NumSrcElts, 0);
MaskBits |= Bits.shl(i * SrcEltSizeInBits); SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
UndefBits |= Undefs.shl(i * SrcEltSizeInBits); if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
} if (UndefSrcElts[0])
return SplitBitData(); UndefSrcElts.setBits(0, NumSrcElts);
SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
return CastBitData(UndefSrcElts, SrcEltBits);
} }
} }
} }
@ -5426,10 +5451,15 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::VZEXT_MOVL && if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) { isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits;
auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
MaskBits = MaskBits.zext(SizeInBits); SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
return SplitBitData(); return CastBitData(UndefSrcElts, SrcEltBits);
} }
return false; return false;