diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 7fe8ffbaf6f2..e4adbf438a98 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2528,7 +2528,13 @@ public: /// not legal, but should return true if those types will eventually legalize /// to types that support FMAs. After legalization, it will only be called on /// types that support FMAs (via Legal or Custom actions) - virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { + virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT) const { + return false; + } + + /// IR version + virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { return false; } @@ -3763,7 +3769,7 @@ public: /// Should SelectionDAG lower an atomic store of the given kind as a normal /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to /// eventually migrate all targets to the using StoreSDNodes, but porting is - /// being done target at a time. + /// being done target at a time. virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { assert(SI.isAtomic() && "violated precondition"); return false; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 3f2826cda636..5b0ad083bfd9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1417,7 +1417,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Register Op1 = getOrCreateVReg(*CI.getArgOperand(1)); Register Op2 = getOrCreateVReg(*CI.getArgOperand(2)); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { + TLI.isFMAFasterThanFMulAndFAdd(*MF, + TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9780b6992fbb..2db30279fbdd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11337,7 +11337,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11554,7 +11554,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11860,7 +11860,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // Floating-point multiply-add without intermediate rounding. bool HasFMA = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // Floating-point multiply-add with intermediate rounding. This can result diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1a42150b3e54..3f41a24fa4b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6169,7 +6169,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a9471a7acaf7..9e8df33218b8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8546,11 +8546,12 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { return true; const TargetOptions &Options = getTargetMachine().Options; - const DataLayout &DL = I->getModule()->getDataLayout(); - EVT VT = getValueType(DL, User->getOperand(0)->getType()); + const Function *F = I->getFunction(); + const DataLayout &DL = F->getParent()->getDataLayout(); + Type *Ty = User->getOperand(0)->getType(); - return !(isFMAFasterThanFMulAndFAdd(VT) && - isOperationLegalOrCustom(ISD::FMA, VT) && + return !(isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); } @@ -9207,7 +9208,8 @@ int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL, return -1; } -bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) @@ -9224,6 +9226,17 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } +bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, + Type *Ty) const { + switch (Ty->getScalarType()->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + return false; + } +} + const MCPhysReg * AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 5a76f0c467b7..384c7b4456f0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -396,7 +396,9 @@ public: /// Return true if an FMA operation is faster than a pair of fmul and fadd /// instructions. fmuladd intrinsics will be expanded to FMAs when this method /// returns true, otherwise fmuladd is expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 85af397228e7..1a02037fcd40 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3920,7 +3920,8 @@ MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const { // however does not support denormals, so we do report fma as faster if we have // a fast fma device and require denormals. // -bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { VT = VT.getScalarType(); switch (VT.getSimpleVT().SimpleTy) { @@ -9461,7 +9462,7 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || (N0->getFlags().hasAllowContract() && N1->getFlags().hasAllowContract())) && - isFMAFasterThanFMulAndFAdd(VT)) { + isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { return ISD::FMA; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index c99904c27834..b2c2e40923ae 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -349,7 +349,8 @@ public: EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; bool isFMADLegalForFAddFSub(const SelectionDAG &DAG, const SDNode *N) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c9314007c0a7..a33535ecd174 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14826,7 +14826,8 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, /// /// For MVE, we set this to true as it helps simplify the need for some /// patterns (and we don't have the non-fused floating point instruction). -bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { if (!Subtarget->hasMVEFloatOps()) return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 0aee61fbe802..367a40b89681 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -738,7 +738,8 @@ class VectorType; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 7345100f178f..1d7aa2c2ad25 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1909,7 +1909,8 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32; } -bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { return isOperationLegalOrCustom(ISD::FMA, VT); } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 75f553bfec7f..ed207a7c1ebb 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -137,7 +137,8 @@ namespace HexagonISD { /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is /// expanded to mul + add. - bool isFMAFasterThanFMulAndFAdd(EVT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, + EVT) const override; // Should we expand the build vector with shuffles? bool shouldExpandBuildVectorWithShuffles(EVT VT, diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index ef645fc1e541..546fe49808e2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -538,7 +538,10 @@ public: bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; bool allowUnsafeFPMath(MachineFunction &MF) const; - bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT) const override { + return true; + } bool enableAggressiveFMAFusion(EVT VT) const override { return true; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index a2ffe9e35409..313d6b835dbc 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14948,7 +14948,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } -bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index a01134321b9e..77b19b263466 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -907,7 +907,8 @@ namespace llvm { /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is /// expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index daef108b6f0b..42c18803e283 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -643,7 +643,8 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, return VT.changeVectorElementTypeToInteger(); } -bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 7391365bda4d..f774b8a896c8 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -404,7 +404,8 @@ public: bool isCheapToSpeculateCtlz() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; bool isLegalICmpImmediate(int64_t Imm) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6bb2d1ec9e5a..bcb091eb52ea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29115,8 +29115,8 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { return true; } -bool -X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { if (!Subtarget.hasAnyFMA()) return false; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 6f7e90008de4..184983d30acc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1056,7 +1056,8 @@ namespace llvm { /// Return true if an FMA operation is faster than a pair of fmul and fadd /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true, otherwise fmuladd is expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; /// Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow