forked from OSchip/llvm-project
[X86][SSE] Refactor getTargetConstantBitsFromNode to avoid large APInts (PR32037)
Much of PR32037's compile time regression is due to getTargetConstantBitsFromNode always creating large (>64bit) APInts during the bitcasting from the source data to the destination bitwidth. This commit avoids this bitcast stage if the data is already the correct bitwidth. llvm-svn: 305284
This commit is contained in:
parent
8cc09eb8dd
commit
2b3b717768
|
@ -5314,20 +5314,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||||
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
|
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
|
||||||
unsigned NumElts = SizeInBits / EltSizeInBits;
|
unsigned NumElts = SizeInBits / EltSizeInBits;
|
||||||
|
|
||||||
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
|
// Bitcast a source array of element bits to the target size.
|
||||||
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
|
auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
|
||||||
|
unsigned NumSrcElts = UndefSrcElts.getBitWidth();
|
||||||
|
unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
|
||||||
|
assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
|
||||||
|
"Constant bit sizes don't match");
|
||||||
|
|
||||||
// Extract all the undef/constant element data and pack into single bitsets.
|
|
||||||
APInt UndefBits(SizeInBits, 0);
|
|
||||||
APInt MaskBits(SizeInBits, 0);
|
|
||||||
|
|
||||||
// Split the undef/constant single bitset data into the target elements.
|
|
||||||
auto SplitBitData = [&]() {
|
|
||||||
// Don't split if we don't allow undef bits.
|
// Don't split if we don't allow undef bits.
|
||||||
bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
|
bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
|
||||||
if (UndefBits.getBoolValue() && !AllowUndefs)
|
if (UndefSrcElts.getBoolValue() && !AllowUndefs)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// If we're already the right size, don't bother bitcasting.
|
||||||
|
if (NumSrcElts == NumElts) {
|
||||||
|
UndefElts = UndefSrcElts;
|
||||||
|
EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract all the undef/constant element data and pack into single bitsets.
|
||||||
|
APInt UndefBits(SizeInBits, 0);
|
||||||
|
APInt MaskBits(SizeInBits, 0);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i != NumSrcElts; ++i) {
|
||||||
|
unsigned BitOffset = i * SrcEltSizeInBits;
|
||||||
|
if (UndefSrcElts[i])
|
||||||
|
UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
|
||||||
|
MaskBits.insertBits(SrcEltBits[i], BitOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split the undef/constant single bitset data into the target elements.
|
||||||
UndefElts = APInt(NumElts, 0);
|
UndefElts = APInt(NumElts, 0);
|
||||||
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
|
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
|
||||||
|
|
||||||
|
@ -5356,20 +5373,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||||
|
|
||||||
// Collect constant bits and insert into mask/undef bit masks.
|
// Collect constant bits and insert into mask/undef bit masks.
|
||||||
auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
|
auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
|
||||||
unsigned BitOffset) {
|
unsigned UndefBitIndex) {
|
||||||
if (!Cst)
|
if (!Cst)
|
||||||
return false;
|
return false;
|
||||||
if (isa<UndefValue>(Cst)) {
|
if (isa<UndefValue>(Cst)) {
|
||||||
unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
|
Undefs.setBit(UndefBitIndex);
|
||||||
Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
|
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
|
||||||
Mask.insertBits(CInt->getValue(), BitOffset);
|
Mask = CInt->getValue();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
|
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
|
||||||
Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
|
Mask = CFP->getValueAPF().bitcastToAPInt();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -5377,18 +5393,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||||
|
|
||||||
// Extract constant bits from build vector.
|
// Extract constant bits from build vector.
|
||||||
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
|
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
|
||||||
|
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
|
||||||
|
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
|
||||||
|
|
||||||
|
APInt UndefSrcElts(NumSrcElts, 0);
|
||||||
|
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
|
||||||
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
|
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
|
||||||
const SDValue &Src = Op.getOperand(i);
|
const SDValue &Src = Op.getOperand(i);
|
||||||
unsigned BitOffset = i * SrcEltSizeInBits;
|
|
||||||
if (Src.isUndef()) {
|
if (Src.isUndef()) {
|
||||||
UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
|
UndefSrcElts.setBit(i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto *Cst = cast<ConstantSDNode>(Src);
|
auto *Cst = cast<ConstantSDNode>(Src);
|
||||||
APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
|
SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
|
||||||
MaskBits.insertBits(Bits, BitOffset);
|
|
||||||
}
|
}
|
||||||
return SplitBitData();
|
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract constant bits from constant pool vector.
|
// Extract constant bits from constant pool vector.
|
||||||
|
@ -5397,27 +5416,33 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||||
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
|
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
|
unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
|
||||||
for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i)
|
unsigned NumSrcElts = CstTy->getVectorNumElements();
|
||||||
if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
|
|
||||||
i * CstEltSizeInBits))
|
APInt UndefSrcElts(NumSrcElts, 0);
|
||||||
|
SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
|
||||||
|
for (unsigned i = 0; i != NumSrcElts; ++i)
|
||||||
|
if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
|
||||||
|
UndefSrcElts, i))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return SplitBitData();
|
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract constant bits from a broadcasted constant pool scalar.
|
// Extract constant bits from a broadcasted constant pool scalar.
|
||||||
if (Op.getOpcode() == X86ISD::VBROADCAST &&
|
if (Op.getOpcode() == X86ISD::VBROADCAST &&
|
||||||
EltSizeInBits <= SrcEltSizeInBits) {
|
EltSizeInBits <= VT.getScalarSizeInBits()) {
|
||||||
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
|
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
|
||||||
APInt Bits(SizeInBits, 0);
|
unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
|
||||||
APInt Undefs(SizeInBits, 0);
|
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
|
||||||
if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
|
|
||||||
for (unsigned i = 0; i != NumSrcElts; ++i) {
|
APInt UndefSrcElts(NumSrcElts, 0);
|
||||||
MaskBits |= Bits.shl(i * SrcEltSizeInBits);
|
SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
|
||||||
UndefBits |= Undefs.shl(i * SrcEltSizeInBits);
|
if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
|
||||||
}
|
if (UndefSrcElts[0])
|
||||||
return SplitBitData();
|
UndefSrcElts.setBits(0, NumSrcElts);
|
||||||
|
SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
|
||||||
|
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5426,10 +5451,15 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||||
if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
|
if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
|
||||||
Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||||
isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
|
isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
|
||||||
|
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
|
||||||
|
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
|
||||||
|
|
||||||
|
APInt UndefSrcElts(NumSrcElts, 0);
|
||||||
|
SmallVector<APInt, 64> SrcEltBits;
|
||||||
auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
|
auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
|
||||||
MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
|
SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
|
||||||
MaskBits = MaskBits.zext(SizeInBits);
|
SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
|
||||||
return SplitBitData();
|
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue