forked from OSchip/llvm-project
DAG: Add function context to isFMAFasterThanFMulAndFAdd
AMDGPU needs to know the FP mode for the function to answer this correctly when this is removed from the subtarget. AArch64 had to make this more complicated by using this from an IR hook, so add an IR typed overload.
This commit is contained in:
parent
714445e406
commit
b696b9dba7
|
@ -2528,7 +2528,13 @@ public:
|
|||
/// not legal, but should return true if those types will eventually legalize
|
||||
/// to types that support FMAs. After legalization, it will only be called on
|
||||
/// types that support FMAs (via Legal or Custom actions)
|
||||
virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
|
||||
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// IR version
|
||||
virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -3763,7 +3769,7 @@ public:
|
|||
/// Should SelectionDAG lower an atomic store of the given kind as a normal
|
||||
/// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
|
||||
/// eventually migrate all targets to the using StoreSDNodes, but porting is
|
||||
/// being done target at a time.
|
||||
/// being done target at a time.
|
||||
virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
|
||||
assert(SI.isAtomic() && "violated precondition");
|
||||
return false;
|
||||
|
|
|
@ -1417,7 +1417,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
|
|||
Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
|
||||
Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
|
||||
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
|
||||
TLI.isFMAFasterThanFMulAndFAdd(*MF,
|
||||
TLI.getValueType(*DL, CI.getType()))) {
|
||||
// TODO: Revisit this to see if we should move this part of the
|
||||
// lowering to the combiner.
|
||||
MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
|
||||
|
|
|
@ -11337,7 +11337,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
|||
|
||||
// Floating-point multiply-add without intermediate rounding.
|
||||
bool HasFMA =
|
||||
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
|
||||
|
||||
// No valid opcode, do not combine.
|
||||
|
@ -11554,7 +11554,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
|||
|
||||
// Floating-point multiply-add without intermediate rounding.
|
||||
bool HasFMA =
|
||||
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
|
||||
|
||||
// No valid opcode, do not combine.
|
||||
|
@ -11860,7 +11860,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
|
|||
// Floating-point multiply-add without intermediate rounding.
|
||||
bool HasFMA =
|
||||
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
|
||||
|
||||
// Floating-point multiply-add with intermediate rounding. This can result
|
||||
|
|
|
@ -6169,7 +6169,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
|||
case Intrinsic::fmuladd: {
|
||||
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
|
||||
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(VT)) {
|
||||
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
|
||||
setValue(&I, DAG.getNode(ISD::FMA, sdl,
|
||||
getValue(I.getArgOperand(0)).getValueType(),
|
||||
getValue(I.getArgOperand(0)),
|
||||
|
|
|
@ -8546,11 +8546,12 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
|
|||
return true;
|
||||
|
||||
const TargetOptions &Options = getTargetMachine().Options;
|
||||
const DataLayout &DL = I->getModule()->getDataLayout();
|
||||
EVT VT = getValueType(DL, User->getOperand(0)->getType());
|
||||
const Function *F = I->getFunction();
|
||||
const DataLayout &DL = F->getParent()->getDataLayout();
|
||||
Type *Ty = User->getOperand(0)->getType();
|
||||
|
||||
return !(isFMAFasterThanFMulAndFAdd(VT) &&
|
||||
isOperationLegalOrCustom(ISD::FMA, VT) &&
|
||||
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
|
||||
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
|
||||
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
|
||||
Options.UnsafeFPMath));
|
||||
}
|
||||
|
@ -9207,7 +9208,8 @@ int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
|
||||
const MachineFunction &MF, EVT VT) const {
|
||||
VT = VT.getScalarType();
|
||||
|
||||
if (!VT.isSimple())
|
||||
|
@ -9224,6 +9226,17 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
|
||||
Type *Ty) const {
|
||||
switch (Ty->getScalarType()->getTypeID()) {
|
||||
case Type::FloatTyID:
|
||||
case Type::DoubleTyID:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const MCPhysReg *
|
||||
AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
|
||||
// LR is a callee-save register, but we must treat it as clobbered by any call
|
||||
|
|
|
@ -396,7 +396,9 @@ public:
|
|||
/// Return true if an FMA operation is faster than a pair of fmul and fadd
|
||||
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
|
||||
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
|
||||
|
||||
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
|
||||
|
||||
|
|
|
@ -3920,7 +3920,8 @@ MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
|
|||
// however does not support denormals, so we do report fma as faster if we have
|
||||
// a fast fma device and require denormals.
|
||||
//
|
||||
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const {
|
||||
VT = VT.getScalarType();
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
|
@ -9461,7 +9462,7 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
|
|||
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
|
||||
(N0->getFlags().hasAllowContract() &&
|
||||
N1->getFlags().hasAllowContract())) &&
|
||||
isFMAFasterThanFMulAndFAdd(VT)) {
|
||||
isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
|
||||
return ISD::FMA;
|
||||
}
|
||||
|
||||
|
|
|
@ -349,7 +349,8 @@ public:
|
|||
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
||||
EVT VT) const override;
|
||||
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
bool isFMADLegalForFAddFSub(const SelectionDAG &DAG,
|
||||
const SDNode *N) const override;
|
||||
|
||||
|
|
|
@ -14826,7 +14826,8 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
|
|||
///
|
||||
/// For MVE, we set this to true as it helps simplify the need for some
|
||||
/// patterns (and we don't have the non-fused floating point instruction).
|
||||
bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const {
|
||||
if (!Subtarget->hasMVEFloatOps())
|
||||
return false;
|
||||
|
||||
|
|
|
@ -738,7 +738,8 @@ class VectorType;
|
|||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDNode *> &Created) const override;
|
||||
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
|
||||
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
|
|
@ -1909,7 +1909,8 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
|
|||
return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
|
||||
}
|
||||
|
||||
bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
|
||||
const MachineFunction &MF, EVT VT) const {
|
||||
return isOperationLegalOrCustom(ISD::FMA, VT);
|
||||
}
|
||||
|
||||
|
|
|
@ -137,7 +137,8 @@ namespace HexagonISD {
|
|||
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
|
||||
/// method returns true (and FMAs are legal), otherwise fmuladd is
|
||||
/// expanded to mul + add.
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &,
|
||||
EVT) const override;
|
||||
|
||||
// Should we expand the build vector with shuffles?
|
||||
bool shouldExpandBuildVectorWithShuffles(EVT VT,
|
||||
|
|
|
@ -538,7 +538,10 @@ public:
|
|||
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
|
||||
bool allowUnsafeFPMath(MachineFunction &MF) const;
|
||||
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT) const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
|
||||
|
||||
|
|
|
@ -14948,7 +14948,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const {
|
||||
VT = VT.getScalarType();
|
||||
|
||||
if (!VT.isSimple())
|
||||
|
|
|
@ -907,7 +907,8 @@ namespace llvm {
|
|||
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
||||
/// expanded to FMAs when this method returns true, otherwise fmuladd is
|
||||
/// expanded to fmul + fadd.
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
|
||||
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
|
||||
|
||||
|
|
|
@ -643,7 +643,8 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
|
|||
return VT.changeVectorElementTypeToInteger();
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
|
||||
const MachineFunction &MF, EVT VT) const {
|
||||
VT = VT.getScalarType();
|
||||
|
||||
if (!VT.isSimple())
|
||||
|
|
|
@ -404,7 +404,8 @@ public:
|
|||
bool isCheapToSpeculateCtlz() const override { return true; }
|
||||
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
|
||||
EVT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
bool isFPImmLegal(const APFloat &Imm, EVT VT,
|
||||
bool ForCodeSize) const override;
|
||||
bool isLegalICmpImmediate(int64_t Imm) const override;
|
||||
|
|
|
@ -29115,8 +29115,8 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const {
|
||||
if (!Subtarget.hasAnyFMA())
|
||||
return false;
|
||||
|
||||
|
|
|
@ -1056,7 +1056,8 @@ namespace llvm {
|
|||
/// Return true if an FMA operation is faster than a pair of fmul and fadd
|
||||
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
|
||||
/// method returns true, otherwise fmuladd is expanded to fmul + fadd.
|
||||
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
|
||||
/// Return true if it's profitable to narrow
|
||||
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
|
||||
|
|
Loading…
Reference in New Issue