forked from OSchip/llvm-project
[x86] make mayFold* helpers visible to more files; NFC
The first function is needed for D112464, but we might as well keep these together in case the others can be used someday.
This commit is contained in:
parent
8f786b4618
commit
837518d6a0
|
@ -5039,8 +5039,8 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
|
|||
// Other Lowering Hooks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
|
||||
bool AssumeSingleUse = false) {
|
||||
bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
|
||||
bool AssumeSingleUse) {
|
||||
if (!AssumeSingleUse && !Op.hasOneUse())
|
||||
return false;
|
||||
if (!ISD::isNormalLoad(Op.getNode()))
|
||||
|
@ -5058,11 +5058,11 @@ static bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
|
||||
const X86Subtarget &Subtarget,
|
||||
bool AssumeSingleUse = false) {
|
||||
bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
|
||||
const X86Subtarget &Subtarget,
|
||||
bool AssumeSingleUse) {
|
||||
assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
|
||||
if (!mayFoldLoad(Op, Subtarget, AssumeSingleUse))
|
||||
if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
|
||||
return false;
|
||||
|
||||
// We can not replace a wide volatile load with a broadcast-from-memory,
|
||||
|
@ -5072,11 +5072,11 @@ static bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
|
|||
Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
|
||||
}
|
||||
|
||||
static bool MayFoldIntoStore(SDValue Op) {
|
||||
bool X86::mayFoldIntoStore(SDValue Op) {
|
||||
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
|
||||
}
|
||||
|
||||
static bool MayFoldIntoZeroExtend(SDValue Op) {
|
||||
bool X86::mayFoldIntoZeroExtend(SDValue Op) {
|
||||
if (Op.hasOneUse()) {
|
||||
unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
|
||||
return (ISD::ZERO_EXTEND == Opcode);
|
||||
|
@ -9013,7 +9013,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
|||
Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL);
|
||||
} else {
|
||||
if (!Subtarget.hasAVX2() &&
|
||||
!mayFoldLoadIntoBroadcastFromMem(
|
||||
!X86::mayFoldLoadIntoBroadcastFromMem(
|
||||
RepeatLoad, RepeatVT.getScalarType().getSimpleVT(),
|
||||
Subtarget,
|
||||
/*AssumeSingleUse=*/true))
|
||||
|
@ -12746,7 +12746,7 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
|
|||
MutableArrayRef<int> InputMask) {
|
||||
unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
|
||||
if (!Subtarget.hasAVX2() && (!Subtarget.hasAVX() || EltSizeInBits < 32 ||
|
||||
!mayFoldLoad(Input, Subtarget)))
|
||||
!X86::mayFoldLoad(Input, Subtarget)))
|
||||
return;
|
||||
if (isNoopShuffleMask(InputMask))
|
||||
return;
|
||||
|
@ -16431,7 +16431,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
|||
bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1);
|
||||
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
|
||||
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
|
||||
mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
|
||||
X86::mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
|
||||
auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
|
||||
if (!Ld->isNonTemporal()) {
|
||||
MVT MemVT = VT.getHalfNumVectorElementsVT();
|
||||
|
@ -19068,8 +19068,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
|
|||
if (VT.getSizeInBits() == 8) {
|
||||
// If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
|
||||
// we're going to zero extend the register or fold the store.
|
||||
if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) &&
|
||||
!MayFoldIntoStore(Op))
|
||||
if (llvm::isNullConstant(Idx) && !X86::mayFoldIntoZeroExtend(Op) &&
|
||||
!X86::mayFoldIntoStore(Op))
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
|
||||
DAG.getBitcast(MVT::v4i32, Vec), Idx));
|
||||
|
@ -19225,8 +19225,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
|||
if (VT == MVT::i16) {
|
||||
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
|
||||
// we're going to zero extend the register or fold the store (SSE41 only).
|
||||
if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
|
||||
!(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) {
|
||||
if (IdxVal == 0 && !X86::mayFoldIntoZeroExtend(Op) &&
|
||||
!(Subtarget.hasSSE41() && X86::mayFoldIntoStore(Op))) {
|
||||
if (Subtarget.hasFP16())
|
||||
return Op;
|
||||
|
||||
|
@ -19432,7 +19432,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 &&
|
||||
((Subtarget.hasAVX2() && EltSizeInBits != 8) ||
|
||||
(Subtarget.hasAVX() && (EltSizeInBits >= 32) &&
|
||||
mayFoldLoad(N1, Subtarget)))) {
|
||||
X86::mayFoldLoad(N1, Subtarget)))) {
|
||||
SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);
|
||||
SmallVector<int, 8> BlendMask;
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
|
@ -19505,7 +19505,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
// combine either bitwise AND or insert of float 0.0 to set these bits.
|
||||
|
||||
bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
|
||||
if (IdxVal == 0 && (!MinSize || !mayFoldLoad(N1, Subtarget))) {
|
||||
if (IdxVal == 0 && (!MinSize || !X86::mayFoldLoad(N1, Subtarget))) {
|
||||
// If this is an insertion of 32-bits into the low 32-bits of
|
||||
// a vector, we prefer to generate a blend with immediate rather
|
||||
// than an insertps. Blends are simpler operations in hardware and so
|
||||
|
@ -24645,8 +24645,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
// being inserted between two CMOV's. (in i16 case too TBN)
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=40974
|
||||
if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
|
||||
(Op.getValueType() == MVT::i16 && !mayFoldLoad(Op1, Subtarget) &&
|
||||
!mayFoldLoad(Op2, Subtarget))) {
|
||||
(Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) &&
|
||||
!X86::mayFoldLoad(Op2, Subtarget))) {
|
||||
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
|
||||
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
|
||||
SDValue Ops[] = { Op2, Op1, CC, Cond };
|
||||
|
@ -36997,7 +36997,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
if (isUndefOrEqual(Mask, 0)) {
|
||||
if (V1.getValueType() == MaskVT &&
|
||||
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||
mayFoldLoad(V1.getOperand(0), Subtarget)) {
|
||||
X86::mayFoldLoad(V1.getOperand(0), Subtarget)) {
|
||||
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
|
||||
return SDValue(); // Nothing to do!
|
||||
Res = V1.getOperand(0);
|
||||
|
@ -38440,8 +38440,8 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
|
|||
unsigned Imm = V.getConstantOperandVal(2);
|
||||
const X86Subtarget &Subtarget =
|
||||
static_cast<const X86Subtarget &>(DAG.getSubtarget());
|
||||
if (!mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
|
||||
mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
|
||||
if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
|
||||
X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
|
||||
return SDValue();
|
||||
Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
|
||||
return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
|
||||
|
@ -51713,8 +51713,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
|||
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
|
||||
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
|
||||
(Subtarget.hasAVX2() ||
|
||||
mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0), VT.getScalarType(),
|
||||
Subtarget)))
|
||||
X86::mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0),
|
||||
VT.getScalarType(), Subtarget)))
|
||||
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
|
||||
Op0.getOperand(0),
|
||||
|
@ -51723,7 +51723,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
|||
// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
|
||||
if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||
(Subtarget.hasAVX2() ||
|
||||
(EltSizeInBits >= 32 && mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
|
||||
(EltSizeInBits >= 32 &&
|
||||
X86::mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
|
||||
Op0.getOperand(0).getValueType() == VT.getScalarType())
|
||||
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
|
||||
|
||||
|
@ -53056,7 +53057,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
|
|||
case ISD::SRL: {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
// Look out for (store (shl (load), x)).
|
||||
if (mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
|
||||
if (X86::mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
@ -53071,11 +53072,11 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
|
|||
SDValue N0 = Op.getOperand(0);
|
||||
SDValue N1 = Op.getOperand(1);
|
||||
// Avoid disabling potential load folding opportunities.
|
||||
if (mayFoldLoad(N1, Subtarget) &&
|
||||
if (X86::mayFoldLoad(N1, Subtarget) &&
|
||||
(!Commute || !isa<ConstantSDNode>(N0) ||
|
||||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op))))
|
||||
return false;
|
||||
if (mayFoldLoad(N0, Subtarget) &&
|
||||
if (X86::mayFoldLoad(N0, Subtarget) &&
|
||||
((Commute && !isa<ConstantSDNode>(N1)) ||
|
||||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
|
||||
return false;
|
||||
|
|
|
@ -911,6 +911,25 @@ namespace llvm {
|
|||
/// as zero if AllowPartialUndefs is set, else we fail and return false.
|
||||
bool isConstantSplat(SDValue Op, APInt &SplatVal,
|
||||
bool AllowPartialUndefs = true);
|
||||
|
||||
/// Check if Op is a load operation that could be folded into some other x86
|
||||
/// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
|
||||
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
|
||||
bool AssumeSingleUse = false);
|
||||
|
||||
/// Check if Op is a load operation that could be folded into a vector splat
|
||||
/// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
|
||||
bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
|
||||
const X86Subtarget &Subtarget,
|
||||
bool AssumeSingleUse = false);
|
||||
|
||||
/// Check if Op is a value that could be used to fold a store into some
|
||||
/// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
|
||||
bool mayFoldIntoStore(SDValue Op);
|
||||
|
||||
/// Check if Op is an operation that could be folded into a zero extend x86
|
||||
/// instruction.
|
||||
bool mayFoldIntoZeroExtend(SDValue Op);
|
||||
} // end namespace X86
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
|
Loading…
Reference in New Issue