forked from OSchip/llvm-project
[LoopDist] Add llvm.loop.distribute.enable loop metadata
Summary: D19403 adds a new pragma for loop distribution. This change adds support for the corresponding metadata that the pragma is translated to by the FE. As part of this I had to rethink the flag -enable-loop-distribute. My goal was to be backward compatible with the existing behavior: A1. pass is off by default from the optimization pipeline unless -enable-loop-distribute is specified A2. pass is on when invoked directly from opt (e.g. for unit-testing) The new pragma/metadata overrides these defaults so the new behavior is: B1. A1 + enable distribution for individual loop with the pragma/metadata B2. A2 + disable distribution for individual loop with the pragma/metadata The default value whether the pass is on or off comes from the initiator of the pass. From the PassManagerBuilder the default is off, from opt it's on. I moved -enable-loop-distribute under the pass. If the flag is specified it overrides the default from above. Then the pragma/metadata can further modifies this per loop. As a side-effect, we can now also use -enable-loop-distribute=0 from opt to emulate the default from the optimization pipeline. So to be precise this is the new behavior: C1. pass is off by default from the optimization pipeline unless -enable-loop-distribute or the pragma/metadata enables it C2. pass is on when invoked directly from opt unless -enable-loop-distribute=0 or the pragma/metadata disables it Reviewers: hfinkel Subscribers: joker.eph, mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D19431 llvm-svn: 267672
This commit is contained in:
parent
08efb0efcd
commit
d2fa414718
|
@ -4711,6 +4711,27 @@ which is the string ``llvm.loop.licm_versioning.disable``. For example:
|
|||
|
||||
!0 = !{!"llvm.loop.licm_versioning.disable"}
|
||||
|
||||
'``llvm.loop.distribute.enable``' Metadata
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Loop distribution allows splitting a loop into multiple loops. Currently,
|
||||
this is only performed if the entire loop cannot be vectorized due to unsafe
|
||||
memory dependencies. The transformation will atempt to isolate the unsafe
|
||||
dependencies into their own loop.
|
||||
|
||||
This metadata can be used to selectively enable or disable distribution of the
|
||||
loop. The first operand is the string ``llvm.loop.distribute.enable`` and the
|
||||
second operand is a bit. If the bit operand value is 1 distribution is
|
||||
enabled. A value of 0 disables distribution:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
!0 = !{!"llvm.loop.distribute.enable", i1 0}
|
||||
!1 = !{!"llvm.loop.distribute.enable", i1 1}
|
||||
|
||||
This metadata should be used in conjunction with ``llvm.loop`` loop
|
||||
identification metadata.
|
||||
|
||||
'``llvm.mem``'
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
|
|
|
@ -479,7 +479,10 @@ FunctionPass *createNaryReassociatePass();
|
|||
//
|
||||
// LoopDistribute - Distribute loops.
|
||||
//
|
||||
FunctionPass *createLoopDistributePass();
|
||||
// ProcessAllLoopsByDefault instructs the pass to look for distribution
|
||||
// opportunities in all loops unless -enable-loop-distribute or the
|
||||
// llvm.loop.distribute.enable metadata data override this default.
|
||||
FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
|
|
@ -95,10 +95,6 @@ static cl::opt<bool> EnableLoopInterchange(
|
|||
"enable-loopinterchange", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the new, experimental LoopInterchange Pass"));
|
||||
|
||||
static cl::opt<bool> EnableLoopDistribute(
|
||||
"enable-loop-distribute", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the new, experimental LoopDistribution Pass"));
|
||||
|
||||
static cl::opt<bool> EnableNonLTOGlobalsModRef(
|
||||
"enable-non-lto-gmr", cl::init(true), cl::Hidden,
|
||||
cl::desc(
|
||||
|
@ -480,9 +476,10 @@ void PassManagerBuilder::populateModulePassManager(
|
|||
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
|
||||
|
||||
// Distribute loops to allow partial vectorization. I.e. isolate dependences
|
||||
// into separate loop that would otherwise inhibit vectorization.
|
||||
if (EnableLoopDistribute)
|
||||
MPM.add(createLoopDistributePass());
|
||||
// into separate loop that would otherwise inhibit vectorization. This is
|
||||
// currently only performed for loops marked with the metadata
|
||||
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
|
||||
MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false));
|
||||
|
||||
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
|
||||
|
||||
|
|
|
@ -60,6 +60,19 @@ static cl::opt<unsigned> DistributeSCEVCheckThreshold(
|
|||
cl::desc("The maximum number of SCEV checks allowed for Loop "
|
||||
"Distribution"));
|
||||
|
||||
static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
|
||||
"loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
|
||||
cl::Hidden,
|
||||
cl::desc(
|
||||
"The maximum number of SCEV checks allowed for Loop "
|
||||
"Distribution for loop marked with #pragma loop distribute(enable)"));
|
||||
|
||||
// Note that the initial value for this depends on whether the pass is invoked
|
||||
// directly or from the optimization pipeline.
|
||||
static cl::opt<bool> EnableLoopDistribute(
|
||||
"enable-loop-distribute", cl::Hidden,
|
||||
cl::desc("Enable the new, experimental LoopDistribution Pass"));
|
||||
|
||||
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
|
||||
|
||||
namespace {
|
||||
|
@ -576,7 +589,9 @@ class LoopDistributeForLoop {
|
|||
public:
|
||||
LoopDistributeForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
|
||||
DominatorTree *DT, ScalarEvolution *SE)
|
||||
: L(L), LI(LI), LAI(LAI), DT(DT), SE(SE) {}
|
||||
: L(L), LI(LI), LAI(LAI), DT(DT), SE(SE) {
|
||||
setForced();
|
||||
}
|
||||
|
||||
/// \brief Try to distribute an inner-most loop.
|
||||
bool processLoop() {
|
||||
|
@ -683,7 +698,9 @@ public:
|
|||
|
||||
// Don't distribute the loop if we need too many SCEV run-time checks.
|
||||
const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
|
||||
if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
|
||||
if (Pred.getComplexity() > (IsForced.getValueOr(false)
|
||||
? PragmaDistributeSCEVCheckThreshold
|
||||
: DistributeSCEVCheckThreshold)) {
|
||||
DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
|
||||
return false;
|
||||
}
|
||||
|
@ -735,6 +752,13 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
/// \brief Return if distribution forced to be enabled/disabled for the loop.
|
||||
///
|
||||
/// If the optional has a value, it indicates whether distribution was forced
|
||||
/// to be enabled (true) or disabled (false). If the optional has no value
|
||||
/// distribution was not forced either way.
|
||||
const Optional<bool> &isForced() const { return IsForced; }
|
||||
|
||||
private:
|
||||
/// \brief Filter out checks between pointers from the same partition.
|
||||
///
|
||||
|
@ -775,18 +799,47 @@ private:
|
|||
return Checks;
|
||||
}
|
||||
|
||||
/// \brief Check whether the loop metadata is forcing distribution to be
|
||||
/// enabled/disabled.
|
||||
void setForced() {
|
||||
Optional<const MDOperand *> Value =
|
||||
findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
|
||||
if (!Value)
|
||||
return;
|
||||
|
||||
const MDOperand *Op = *Value;
|
||||
assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
|
||||
IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
|
||||
}
|
||||
|
||||
// Analyses used.
|
||||
Loop *L;
|
||||
LoopInfo *LI;
|
||||
const LoopAccessInfo &LAI;
|
||||
DominatorTree *DT;
|
||||
ScalarEvolution *SE;
|
||||
|
||||
/// \brief Indicates whether distribution is forced to be enabled/disabled for
|
||||
/// the loop.
|
||||
///
|
||||
/// If the optional has a value, it indicates whether distribution was forced
|
||||
/// to be enabled (true) or disabled (false). If the optional has no value
|
||||
/// distribution was not forced either way.
|
||||
Optional<bool> IsForced;
|
||||
};
|
||||
|
||||
/// \brief The pass class.
|
||||
class LoopDistribute : public FunctionPass {
|
||||
public:
|
||||
LoopDistribute() : FunctionPass(ID) {
|
||||
/// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
|
||||
/// performed by default. Pass -enable-loop-distribute={0,1} overrides this
|
||||
/// default. We use this to keep LoopDistribution off by default when invoked
|
||||
/// from the optimization pipeline but on when invoked explicitly from opt.
|
||||
LoopDistribute(bool ProcessAllLoopsByDefault = true)
|
||||
: FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
|
||||
// The default is set by the caller.
|
||||
if (EnableLoopDistribute.getNumOccurrences() > 0)
|
||||
ProcessAllLoops = EnableLoopDistribute;
|
||||
initializeLoopDistributePass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
@ -812,7 +865,11 @@ public:
|
|||
for (Loop *L : Worklist) {
|
||||
const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
|
||||
LoopDistributeForLoop LDL(L, LI, LAI, DT, SE);
|
||||
Changed |= LDL.processLoop();
|
||||
|
||||
// If distribution was forced for the specific loop to be
|
||||
// enabled/disabled, follow that. Otherwise use the global flag.
|
||||
if (LDL.isForced().getValueOr(ProcessAllLoops))
|
||||
Changed |= LDL.processLoop();
|
||||
}
|
||||
|
||||
// Process each loop nest in the function.
|
||||
|
@ -829,6 +886,11 @@ public:
|
|||
}
|
||||
|
||||
static char ID;
|
||||
|
||||
private:
|
||||
/// \brief Whether distribution should be on in this function. The per-loop
|
||||
/// pragma can override this.
|
||||
bool ProcessAllLoops;
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
|
@ -843,5 +905,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
|||
INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createLoopDistributePass() { return new LoopDistribute(); }
|
||||
FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
|
||||
return new LoopDistribute(ProcessAllLoopsByDefault);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
; RUN: opt -basicaa -loop-distribute -enable-loop-distribute=0 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=EXPLICIT --check-prefix=DEFAULT_OFF
|
||||
; RUN: opt -basicaa -loop-distribute -enable-loop-distribute=1 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=EXPLICIT --check-prefix=DEFAULT_ON
|
||||
|
||||
; Same loop as in basic.ll. Check that distribution is enabled/disabled
|
||||
; properly according to -enable-loop-distribute=0/1 and the
|
||||
; llvm.loop.distribute.enable metadata.
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
|
||||
; CHECK-LABEL: @explicit_on(
|
||||
define void @explicit_on(i32* noalias %a,
|
||||
i32* noalias %b,
|
||||
i32* noalias %c,
|
||||
i32* noalias %d,
|
||||
i32* noalias %e) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
; EXPLICIT: for.body.ldist1:
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
||||
|
||||
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
|
||||
%loadA = load i32, i32* %arrayidxA, align 4
|
||||
|
||||
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
|
||||
%loadB = load i32, i32* %arrayidxB, align 4
|
||||
|
||||
%mulA = mul i32 %loadB, %loadA
|
||||
|
||||
%add = add nuw nsw i64 %ind, 1
|
||||
%arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
|
||||
store i32 %mulA, i32* %arrayidxA_plus_4, align 4
|
||||
|
||||
%arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
|
||||
%loadD = load i32, i32* %arrayidxD, align 4
|
||||
|
||||
%arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
|
||||
%loadE = load i32, i32* %arrayidxE, align 4
|
||||
|
||||
%mulC = mul i32 %loadD, %loadE
|
||||
|
||||
%arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
|
||||
store i32 %mulC, i32* %arrayidxC, align 4
|
||||
|
||||
%exitcond = icmp eq i64 %add, 20
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @explicit_off(
|
||||
define void @explicit_off(i32* noalias %a,
|
||||
i32* noalias %b,
|
||||
i32* noalias %c,
|
||||
i32* noalias %d,
|
||||
i32* noalias %e) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
; EXPLICIT-NOT: for.body.ldist1:
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
||||
|
||||
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
|
||||
%loadA = load i32, i32* %arrayidxA, align 4
|
||||
|
||||
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
|
||||
%loadB = load i32, i32* %arrayidxB, align 4
|
||||
|
||||
%mulA = mul i32 %loadB, %loadA
|
||||
|
||||
%add = add nuw nsw i64 %ind, 1
|
||||
%arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
|
||||
store i32 %mulA, i32* %arrayidxA_plus_4, align 4
|
||||
|
||||
%arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
|
||||
%loadD = load i32, i32* %arrayidxD, align 4
|
||||
|
||||
%arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
|
||||
%loadE = load i32, i32* %arrayidxE, align 4
|
||||
|
||||
%mulC = mul i32 %loadD, %loadE
|
||||
|
||||
%arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
|
||||
store i32 %mulC, i32* %arrayidxC, align 4
|
||||
|
||||
%exitcond = icmp eq i64 %add, 20
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @default_distribute(
|
||||
define void @default_distribute(i32* noalias %a,
|
||||
i32* noalias %b,
|
||||
i32* noalias %c,
|
||||
i32* noalias %d,
|
||||
i32* noalias %e) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
; Verify the two distributed loops.
|
||||
|
||||
; DEFAULT_ON: for.body.ldist1:
|
||||
; DEFAULT_OFF-NOT: for.body.ldist1:
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
||||
|
||||
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
|
||||
%loadA = load i32, i32* %arrayidxA, align 4
|
||||
|
||||
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
|
||||
%loadB = load i32, i32* %arrayidxB, align 4
|
||||
|
||||
%mulA = mul i32 %loadB, %loadA
|
||||
|
||||
%add = add nuw nsw i64 %ind, 1
|
||||
%arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
|
||||
store i32 %mulA, i32* %arrayidxA_plus_4, align 4
|
||||
|
||||
%arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
|
||||
%loadD = load i32, i32* %arrayidxD, align 4
|
||||
|
||||
%arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
|
||||
%loadE = load i32, i32* %arrayidxE, align 4
|
||||
|
||||
%mulC = mul i32 %loadD, %loadE
|
||||
|
||||
%arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
|
||||
store i32 %mulC, i32* %arrayidxC, align 4
|
||||
|
||||
%exitcond = icmp eq i64 %add, 20
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = distinct !{!0, !1}
|
||||
!1 = !{!"llvm.loop.distribute.enable", i1 true}
|
||||
!2 = distinct !{!2, !3}
|
||||
!3 = !{!"llvm.loop.distribute.enable", i1 false}
|
Loading…
Reference in New Issue