diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 4e8b6ec202cd..254cabfc85ba 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -89,12 +89,10 @@ public: RecurrenceDescriptor() = default; RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, - FastMathFlags FMF, MinMaxRecurrenceKind MK, - Instruction *UAI, Type *RT, bool Signed, - SmallPtrSetImpl &CI) - : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), - MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT), - IsSigned(Signed) { + MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT, + bool Signed, SmallPtrSetImpl &CI) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), + UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { CastInsts.insert(CI.begin(), CI.end()); } @@ -200,8 +198,6 @@ public: MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; } - FastMathFlags getFastMathFlags() { return FMF; } - TrackingVH getRecurrenceStartValue() { return StartValue; } Instruction *getLoopExitInstr() { return LoopExitInstr; } @@ -241,9 +237,6 @@ private: Instruction *LoopExitInstr = nullptr; // The kind of the recurrence. RecurrenceKind Kind = RK_NoRecurrence; - // The fast-math flags on the recurrent instructions. We propagate these - // fast-math flags into the vectorized FP instructions we generate. - FastMathFlags FMF; // If this a min/max recurrence the kind of recurrence. MinMaxRecurrenceKind MinMaxKind = MRK_Invalid; // First occurrence of unasfe algebra in the PHI's use-chain. diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 8199c65ca8a0..fefb13c66d45 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -187,12 +187,6 @@ public: FastMathFlags() = default; - static FastMathFlags getFast() { - FastMathFlags FMF; - FMF.setFast(); - return FMF; - } - bool any() const { return Flags != 0; } bool none() const { return Flags == 0; } bool all() const { return Flags == ~0U; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index c724a3ed1057..f81b73672922 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -296,7 +296,6 @@ getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op, Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = RecurrenceDescriptor::MRK_Invalid, - FastMathFlags FMF = FastMathFlags(), ArrayRef RedOps = None); /// Create a target reduction of the given vector. The reduction operation @@ -309,7 +308,6 @@ Value *createSimpleTargetReduction(IRBuilder<> &B, unsigned Opcode, Value *Src, TargetTransformInfo::ReductionFlags Flags = TargetTransformInfo::ReductionFlags(), - FastMathFlags FMF = FastMathFlags(), ArrayRef RedOps = None); /// Create a generic target reduction using a recurrence descriptor \p Desc diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 19f1a771b858..555e3c9ed457 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -251,10 +251,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Worklist.push_back(Start); VisitedInsts.insert(Start); - // Start with all flags set because we will intersect this with the reduction - // flags from all the reduction operations. - FastMathFlags FMF = FastMathFlags::getFast(); - // A value in the reduction can be used: // - By the reduction: // - Reduction operation: @@ -300,8 +296,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; - if (isa(ReduxDesc.getPatternInst())) - FMF &= ReduxDesc.getPatternInst()->getFastMathFlags(); } bool IsASelect = isa(Cur); @@ -447,7 +441,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // Save the description of this reduction variable. RecurrenceDescriptor RD( - RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(), + RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); RedDes = RD; @@ -556,7 +550,7 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { Instruction *UAI = Prev.getUnsafeAlgebraInst(); - if (!UAI && isa(I) && !I->hasAllowReassoc()) + if (!UAI && isa(I) && !I->isFast()) UAI = I; // Found an unsafe (unvectorizable) algebra instruction. switch (I->getOpcode()) { diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp index 34858883298e..f44d89156b6c 100644 --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -118,11 +118,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { } if (!TTI->shouldExpandReduction(II)) continue; - FastMathFlags FMF = - isa(II) ? II->getFastMathFlags() : FastMathFlags{}; Value *Rdx = IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) - : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF); + : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 045cb6284c4a..c3a026852445 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -671,9 +671,13 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, return true; } -static Value *addFastMathFlag(Value *V, FastMathFlags FMF) { - if (isa(V)) - cast(V)->setFastMathFlags(FMF); +/// Adds a 'fast' flag to floating point operations. +static Value *addFastMathFlag(Value *V) { + if (isa(V)) { + FastMathFlags Flags; + Flags.setFast(); + cast(V)->setFastMathFlags(Flags); + } return V; } @@ -757,7 +761,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, Value * llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, - FastMathFlags FMF, ArrayRef RedOps) { + ArrayRef RedOps) { unsigned VF = Src->getType()->getVectorNumElements(); // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles // and vector ops, reducing the set of values being computed by half each @@ -782,8 +786,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, if (Op != Instruction::ICmp && Op != Instruction::FCmp) { // Floating point operations had to be 'fast' to enable the reduction. TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, - TmpVec, Shuf, "bin.rdx"), - FMF); + TmpVec, Shuf, "bin.rdx")); } else { assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && "Invalid min/max"); @@ -800,7 +803,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, /// flags (if generating min/max reductions). Value *llvm::createSimpleTargetReduction( IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, - Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF, + Value *Src, TargetTransformInfo::ReductionFlags Flags, ArrayRef RedOps) { assert(isa(Src->getType()) && "Type must be a vector"); @@ -870,7 +873,7 @@ Value *llvm::createSimpleTargetReduction( } if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) return BuildFunc(); - return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps); + return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); } /// Create a vector reduction using a given recurrence descriptor. @@ -885,37 +888,28 @@ Value *llvm::createTargetReduction(IRBuilder<> &B, Flags.NoNaN = NoNaN; switch (RecKind) { case RD::RK_FloatAdd: - return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); case RD::RK_FloatMult: - return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); case RD::RK_IntegerAdd: - return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); case RD::RK_IntegerMult: - return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); case RD::RK_IntegerAnd: - return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); case RD::RK_IntegerOr: - return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); case RD::RK_IntegerXor: - return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); case RD::RK_IntegerMinMax: { RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); - return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); } case RD::RK_FloatMinMax: { Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; - return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); } default: llvm_unreachable("Unhandled RecKind"); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b57012291df5..b10ce86b6825 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -319,14 +319,11 @@ static unsigned getReciprocalPredBlockProb() { return 2; } /// A helper function that adds a 'fast' flag to floating-point operations. static Value *addFastMathFlag(Value *V) { - if (isa(V)) - cast(V)->setFastMathFlags(FastMathFlags::getFast()); - return V; -} - -static Value *addFastMathFlag(Value *V, FastMathFlags FMF) { - if (isa(V)) - cast(V)->setFastMathFlags(FMF); + if (isa(V)) { + FastMathFlags Flags; + Flags.setFast(); + cast(V)->setFastMathFlags(Flags); + } return V; } @@ -3615,8 +3612,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // Floating point operations had to be 'fast' to enable the reduction. ReducedPartRdx = addFastMathFlag( Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart, - ReducedPartRdx, "bin.rdx"), - RdxDesc.getFastMathFlags()); + ReducedPartRdx, "bin.rdx")); else ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx, RdxPart); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 28ed4e6ff1a9..0016e10567e8 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5929,8 +5929,7 @@ private: if (!IsPairwiseReduction) return createSimpleTargetReduction( Builder, TTI, ReductionData.getOpcode(), VectorizedValue, - ReductionData.getFlags(), FastMathFlags::getFast(), - ReductionOps.back()); + ReductionData.getFlags(), ReductionOps.back()); Value *TmpVec = VectorizedValue; for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { diff --git a/llvm/test/Transforms/LoopVectorize/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/reduction-fastmath.ll deleted file mode 100644 index 1146e31ec258..000000000000 --- a/llvm/test/Transforms/LoopVectorize/reduction-fastmath.ll +++ /dev/null @@ -1,112 +0,0 @@ -; RUN: opt -S -loop-vectorize < %s | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define float @reduction_sum_float_ieee(i32 %n, float* %array) { -; CHECK-LABEL: define float @reduction_sum_float_ieee( -entry: - %entry.cond = icmp ne i32 0, 4096 - br i1 %entry.cond, label %loop, label %loop.exit - -loop: - %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] - %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ] - %address = getelementptr float, float* %array, i32 %idx - %value = load float, float* %address - %sum.inc = fadd float %sum, %value - %idx.inc = add i32 %idx, 1 - %be.cond = icmp ne i32 %idx.inc, 4096 - br i1 %be.cond, label %loop, label %loop.exit - -loop.exit: - %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ] -; CHECK-NOT: %wide.load = load <4 x float>, <4 x float>* -; CHECK: ret float %sum.lcssa - ret float %sum.lcssa -} - -define float @reduction_sum_float_fastmath(i32 %n, float* %array) { -; CHECK-LABEL: define float @reduction_sum_float_fastmath( -; CHECK: fadd fast <4 x float> -; CHECK: fadd fast <4 x float> -; CHECK: fadd fast <4 x float> -; CHECK: fadd fast <4 x float> -; CHECK: fadd fast <4 x float> -entry: - %entry.cond = icmp ne i32 0, 4096 - br i1 %entry.cond, label %loop, label %loop.exit - -loop: - %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] - %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ] - %address = getelementptr float, float* %array, i32 %idx - %value = load float, float* %address - %sum.inc = fadd fast float %sum, %value - %idx.inc = add i32 %idx, 1 - %be.cond = icmp ne i32 %idx.inc, 4096 - br i1 %be.cond, label %loop, label %loop.exit - -loop.exit: - %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ] -; CHECK: ret float %sum.lcssa - ret float %sum.lcssa -} - -define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) { -; CHECK-LABEL: define float @reduction_sum_float_only_reassoc( -; CHECK-NOT: fadd fast -; CHECK: fadd reassoc <4 x float> -; CHECK: fadd reassoc <4 x float> -; CHECK: fadd reassoc <4 x float> -; CHECK: fadd reassoc <4 x float> -; CHECK: fadd reassoc <4 x float> - -entry: - %entry.cond = icmp ne i32 0, 4096 - br i1 %entry.cond, label %loop, label %loop.exit - -loop: - %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] - %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ] - %address = getelementptr float, float* %array, i32 %idx - %value = load float, float* %address - %sum.inc = fadd reassoc float %sum, %value - %idx.inc = add i32 %idx, 1 - %be.cond = icmp ne i32 %idx.inc, 4096 - br i1 %be.cond, label %loop, label %loop.exit - -loop.exit: - %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ] -; CHECK: ret float %sum.lcssa - ret float %sum.lcssa -} - -define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %array) { -; CHECK-LABEL: define float @reduction_sum_float_only_reassoc_and_contract( -; CHECK-NOT: fadd fast -; CHECK: fadd reassoc contract <4 x float> -; CHECK: fadd reassoc contract <4 x float> -; CHECK: fadd reassoc contract <4 x float> -; CHECK: fadd reassoc contract <4 x float> -; CHECK: fadd reassoc contract <4 x float> - -entry: - %entry.cond = icmp ne i32 0, 4096 - br i1 %entry.cond, label %loop, label %loop.exit - -loop: - %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] - %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ] - %address = getelementptr float, float* %array, i32 %idx - %value = load float, float* %address - %sum.inc = fadd reassoc contract float %sum, %value - %idx.inc = add i32 %idx, 1 - %be.cond = icmp ne i32 %idx.inc, 4096 - br i1 %be.cond, label %loop, label %loop.exit - -loop.exit: - %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ] -; CHECK: ret float %sum.lcssa - ret float %sum.lcssa -}