forked from OSchip/llvm-project
Revert "Relax constraints for reduction vectorization"
This reverts commit r355868. Breaks hexagon. llvm-svn: 355873
This commit is contained in:
parent
a958d40e78
commit
2136a5bc49
|
@ -89,12 +89,10 @@ public:
|
|||
RecurrenceDescriptor() = default;
|
||||
|
||||
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
|
||||
FastMathFlags FMF, MinMaxRecurrenceKind MK,
|
||||
Instruction *UAI, Type *RT, bool Signed,
|
||||
SmallPtrSetImpl<Instruction *> &CI)
|
||||
: StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
|
||||
MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
|
||||
IsSigned(Signed) {
|
||||
MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
|
||||
bool Signed, SmallPtrSetImpl<Instruction *> &CI)
|
||||
: StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
|
||||
UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
|
||||
CastInsts.insert(CI.begin(), CI.end());
|
||||
}
|
||||
|
||||
|
@ -200,8 +198,6 @@ public:
|
|||
|
||||
MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
|
||||
|
||||
FastMathFlags getFastMathFlags() { return FMF; }
|
||||
|
||||
TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
|
||||
|
||||
Instruction *getLoopExitInstr() { return LoopExitInstr; }
|
||||
|
@ -241,9 +237,6 @@ private:
|
|||
Instruction *LoopExitInstr = nullptr;
|
||||
// The kind of the recurrence.
|
||||
RecurrenceKind Kind = RK_NoRecurrence;
|
||||
// The fast-math flags on the recurrent instructions. We propagate these
|
||||
// fast-math flags into the vectorized FP instructions we generate.
|
||||
FastMathFlags FMF;
|
||||
// If this a min/max recurrence the kind of recurrence.
|
||||
MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
|
||||
// First occurrence of unasfe algebra in the PHI's use-chain.
|
||||
|
|
|
@ -187,12 +187,6 @@ public:
|
|||
|
||||
FastMathFlags() = default;
|
||||
|
||||
static FastMathFlags getFast() {
|
||||
FastMathFlags FMF;
|
||||
FMF.setFast();
|
||||
return FMF;
|
||||
}
|
||||
|
||||
bool any() const { return Flags != 0; }
|
||||
bool none() const { return Flags == 0; }
|
||||
bool all() const { return Flags == ~0U; }
|
||||
|
|
|
@ -296,7 +296,6 @@ getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
|
|||
Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
||||
RecurrenceDescriptor::MinMaxRecurrenceKind
|
||||
MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
|
||||
FastMathFlags FMF = FastMathFlags(),
|
||||
ArrayRef<Value *> RedOps = None);
|
||||
|
||||
/// Create a target reduction of the given vector. The reduction operation
|
||||
|
@ -309,7 +308,6 @@ Value *createSimpleTargetReduction(IRBuilder<> &B,
|
|||
unsigned Opcode, Value *Src,
|
||||
TargetTransformInfo::ReductionFlags Flags =
|
||||
TargetTransformInfo::ReductionFlags(),
|
||||
FastMathFlags FMF = FastMathFlags(),
|
||||
ArrayRef<Value *> RedOps = None);
|
||||
|
||||
/// Create a generic target reduction using a recurrence descriptor \p Desc
|
||||
|
|
|
@ -251,10 +251,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
|||
Worklist.push_back(Start);
|
||||
VisitedInsts.insert(Start);
|
||||
|
||||
// Start with all flags set because we will intersect this with the reduction
|
||||
// flags from all the reduction operations.
|
||||
FastMathFlags FMF = FastMathFlags::getFast();
|
||||
|
||||
// A value in the reduction can be used:
|
||||
// - By the reduction:
|
||||
// - Reduction operation:
|
||||
|
@ -300,8 +296,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
|||
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
|
||||
if (!ReduxDesc.isRecurrence())
|
||||
return false;
|
||||
if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
|
||||
FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
|
||||
}
|
||||
|
||||
bool IsASelect = isa<SelectInst>(Cur);
|
||||
|
@ -447,7 +441,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
|||
|
||||
// Save the description of this reduction variable.
|
||||
RecurrenceDescriptor RD(
|
||||
RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
|
||||
RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
|
||||
ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
|
||||
RedDes = RD;
|
||||
|
||||
|
@ -556,7 +550,7 @@ RecurrenceDescriptor::InstDesc
|
|||
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
|
||||
InstDesc &Prev, bool HasFunNoNaNAttr) {
|
||||
Instruction *UAI = Prev.getUnsafeAlgebraInst();
|
||||
if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc())
|
||||
if (!UAI && isa<FPMathOperator>(I) && !I->isFast())
|
||||
UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
|
|
|
@ -118,11 +118,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
|
|||
}
|
||||
if (!TTI->shouldExpandReduction(II))
|
||||
continue;
|
||||
FastMathFlags FMF =
|
||||
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
|
||||
Value *Rdx =
|
||||
IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
|
||||
: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF);
|
||||
: getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
||||
II->replaceAllUsesWith(Rdx);
|
||||
II->eraseFromParent();
|
||||
Changed = true;
|
||||
|
|
|
@ -671,9 +671,13 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
|
|||
return true;
|
||||
}
|
||||
|
||||
static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
|
||||
if (isa<FPMathOperator>(V))
|
||||
cast<Instruction>(V)->setFastMathFlags(FMF);
|
||||
/// Adds a 'fast' flag to floating point operations.
|
||||
static Value *addFastMathFlag(Value *V) {
|
||||
if (isa<FPMathOperator>(V)) {
|
||||
FastMathFlags Flags;
|
||||
Flags.setFast();
|
||||
cast<Instruction>(V)->setFastMathFlags(Flags);
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
|
@ -757,7 +761,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
|
|||
Value *
|
||||
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
||||
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
|
||||
FastMathFlags FMF, ArrayRef<Value *> RedOps) {
|
||||
ArrayRef<Value *> RedOps) {
|
||||
unsigned VF = Src->getType()->getVectorNumElements();
|
||||
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
|
||||
// and vector ops, reducing the set of values being computed by half each
|
||||
|
@ -782,8 +786,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
|||
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
|
||||
// Floating point operations had to be 'fast' to enable the reduction.
|
||||
TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
|
||||
TmpVec, Shuf, "bin.rdx"),
|
||||
FMF);
|
||||
TmpVec, Shuf, "bin.rdx"));
|
||||
} else {
|
||||
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
|
||||
"Invalid min/max");
|
||||
|
@ -800,7 +803,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
|
|||
/// flags (if generating min/max reductions).
|
||||
Value *llvm::createSimpleTargetReduction(
|
||||
IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
|
||||
Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF,
|
||||
Value *Src, TargetTransformInfo::ReductionFlags Flags,
|
||||
ArrayRef<Value *> RedOps) {
|
||||
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
|
||||
|
||||
|
@ -870,7 +873,7 @@ Value *llvm::createSimpleTargetReduction(
|
|||
}
|
||||
if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
|
||||
return BuildFunc();
|
||||
return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps);
|
||||
return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
|
||||
}
|
||||
|
||||
/// Create a vector reduction using a given recurrence descriptor.
|
||||
|
@ -885,37 +888,28 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
|
|||
Flags.NoNaN = NoNaN;
|
||||
switch (RecKind) {
|
||||
case RD::RK_FloatAdd:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
|
||||
case RD::RK_FloatMult:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
|
||||
case RD::RK_IntegerAdd:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
|
||||
case RD::RK_IntegerMult:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
|
||||
case RD::RK_IntegerAnd:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
|
||||
case RD::RK_IntegerOr:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
|
||||
case RD::RK_IntegerXor:
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
|
||||
case RD::RK_IntegerMinMax: {
|
||||
RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
|
||||
Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
|
||||
Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
|
||||
}
|
||||
case RD::RK_FloatMinMax: {
|
||||
Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags,
|
||||
Desc.getFastMathFlags());
|
||||
return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unhandled RecKind");
|
||||
|
|
|
@ -319,14 +319,11 @@ static unsigned getReciprocalPredBlockProb() { return 2; }
|
|||
|
||||
/// A helper function that adds a 'fast' flag to floating-point operations.
|
||||
static Value *addFastMathFlag(Value *V) {
|
||||
if (isa<FPMathOperator>(V))
|
||||
cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
|
||||
return V;
|
||||
}
|
||||
|
||||
static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
|
||||
if (isa<FPMathOperator>(V))
|
||||
cast<Instruction>(V)->setFastMathFlags(FMF);
|
||||
if (isa<FPMathOperator>(V)) {
|
||||
FastMathFlags Flags;
|
||||
Flags.setFast();
|
||||
cast<Instruction>(V)->setFastMathFlags(Flags);
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
|
@ -3615,8 +3612,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
|
|||
// Floating point operations had to be 'fast' to enable the reduction.
|
||||
ReducedPartRdx = addFastMathFlag(
|
||||
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
|
||||
ReducedPartRdx, "bin.rdx"),
|
||||
RdxDesc.getFastMathFlags());
|
||||
ReducedPartRdx, "bin.rdx"));
|
||||
else
|
||||
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
|
||||
RdxPart);
|
||||
|
|
|
@ -5929,8 +5929,7 @@ private:
|
|||
if (!IsPairwiseReduction)
|
||||
return createSimpleTargetReduction(
|
||||
Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
|
||||
ReductionData.getFlags(), FastMathFlags::getFast(),
|
||||
ReductionOps.back());
|
||||
ReductionData.getFlags(), ReductionOps.back());
|
||||
|
||||
Value *TmpVec = VectorizedValue;
|
||||
for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
|
||||
|
|
|
@ -1,112 +0,0 @@
|
|||
; RUN: opt -S -loop-vectorize < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define float @reduction_sum_float_ieee(i32 %n, float* %array) {
|
||||
; CHECK-LABEL: define float @reduction_sum_float_ieee(
|
||||
entry:
|
||||
%entry.cond = icmp ne i32 0, 4096
|
||||
br i1 %entry.cond, label %loop, label %loop.exit
|
||||
|
||||
loop:
|
||||
%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
|
||||
%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
|
||||
%address = getelementptr float, float* %array, i32 %idx
|
||||
%value = load float, float* %address
|
||||
%sum.inc = fadd float %sum, %value
|
||||
%idx.inc = add i32 %idx, 1
|
||||
%be.cond = icmp ne i32 %idx.inc, 4096
|
||||
br i1 %be.cond, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
|
||||
; CHECK-NOT: %wide.load = load <4 x float>, <4 x float>*
|
||||
; CHECK: ret float %sum.lcssa
|
||||
ret float %sum.lcssa
|
||||
}
|
||||
|
||||
define float @reduction_sum_float_fastmath(i32 %n, float* %array) {
|
||||
; CHECK-LABEL: define float @reduction_sum_float_fastmath(
|
||||
; CHECK: fadd fast <4 x float>
|
||||
; CHECK: fadd fast <4 x float>
|
||||
; CHECK: fadd fast <4 x float>
|
||||
; CHECK: fadd fast <4 x float>
|
||||
; CHECK: fadd fast <4 x float>
|
||||
entry:
|
||||
%entry.cond = icmp ne i32 0, 4096
|
||||
br i1 %entry.cond, label %loop, label %loop.exit
|
||||
|
||||
loop:
|
||||
%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
|
||||
%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
|
||||
%address = getelementptr float, float* %array, i32 %idx
|
||||
%value = load float, float* %address
|
||||
%sum.inc = fadd fast float %sum, %value
|
||||
%idx.inc = add i32 %idx, 1
|
||||
%be.cond = icmp ne i32 %idx.inc, 4096
|
||||
br i1 %be.cond, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
|
||||
; CHECK: ret float %sum.lcssa
|
||||
ret float %sum.lcssa
|
||||
}
|
||||
|
||||
define float @reduction_sum_float_only_reassoc(i32 %n, float* %array) {
|
||||
; CHECK-LABEL: define float @reduction_sum_float_only_reassoc(
|
||||
; CHECK-NOT: fadd fast
|
||||
; CHECK: fadd reassoc <4 x float>
|
||||
; CHECK: fadd reassoc <4 x float>
|
||||
; CHECK: fadd reassoc <4 x float>
|
||||
; CHECK: fadd reassoc <4 x float>
|
||||
; CHECK: fadd reassoc <4 x float>
|
||||
|
||||
entry:
|
||||
%entry.cond = icmp ne i32 0, 4096
|
||||
br i1 %entry.cond, label %loop, label %loop.exit
|
||||
|
||||
loop:
|
||||
%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
|
||||
%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
|
||||
%address = getelementptr float, float* %array, i32 %idx
|
||||
%value = load float, float* %address
|
||||
%sum.inc = fadd reassoc float %sum, %value
|
||||
%idx.inc = add i32 %idx, 1
|
||||
%be.cond = icmp ne i32 %idx.inc, 4096
|
||||
br i1 %be.cond, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
|
||||
; CHECK: ret float %sum.lcssa
|
||||
ret float %sum.lcssa
|
||||
}
|
||||
|
||||
define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, float* %array) {
|
||||
; CHECK-LABEL: define float @reduction_sum_float_only_reassoc_and_contract(
|
||||
; CHECK-NOT: fadd fast
|
||||
; CHECK: fadd reassoc contract <4 x float>
|
||||
; CHECK: fadd reassoc contract <4 x float>
|
||||
; CHECK: fadd reassoc contract <4 x float>
|
||||
; CHECK: fadd reassoc contract <4 x float>
|
||||
; CHECK: fadd reassoc contract <4 x float>
|
||||
|
||||
entry:
|
||||
%entry.cond = icmp ne i32 0, 4096
|
||||
br i1 %entry.cond, label %loop, label %loop.exit
|
||||
|
||||
loop:
|
||||
%idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
|
||||
%sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
|
||||
%address = getelementptr float, float* %array, i32 %idx
|
||||
%value = load float, float* %address
|
||||
%sum.inc = fadd reassoc contract float %sum, %value
|
||||
%idx.inc = add i32 %idx, 1
|
||||
%be.cond = icmp ne i32 %idx.inc, 4096
|
||||
br i1 %be.cond, label %loop, label %loop.exit
|
||||
|
||||
loop.exit:
|
||||
%sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
|
||||
; CHECK: ret float %sum.lcssa
|
||||
ret float %sum.lcssa
|
||||
}
|
Loading…
Reference in New Issue