2007-04-07 09:25:15 +08:00
|
|
|
//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2007-04-07 09:25:15 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements Loop Rotation Pass.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-05-04 06:02:31 +08:00
|
|
|
#include "llvm/Transforms/Scalar/LoopRotation.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2017-01-11 17:43:56 +08:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Analysis/LoopPass.h"
|
2018-10-25 06:46:45 +08:00
|
|
|
#include "llvm/Analysis/MemorySSA.h"
|
|
|
|
#include "llvm/Analysis/MemorySSAUpdater.h"
|
2007-07-12 07:47:28 +08:00
|
|
|
#include "llvm/Analysis/ScalarEvolution.h"
|
2013-01-21 21:04:33 +08:00
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2016-05-04 06:02:31 +08:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
2017-01-11 17:43:56 +08:00
|
|
|
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
2018-03-29 16:48:15 +08:00
|
|
|
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
|
[LPM] Factor all of the loop analysis usage updates into a common helper
routine.
We were getting this wrong in small ways and generally being very
inconsistent about it across loop passes. Instead, let's have a common
place where we do this. One minor downside is that this will require
some analyses like SCEV in more places than they are strictly needed.
However, this seems benign as these analyses are complete no-ops, and
without this consistency we can in many cases end up with the legacy
pass manager scheduling deciding to split up a loop pass pipeline in
order to run the function analysis half-way through. It is very, very
annoying to fix these without just being very pedantic across the board.
The only loop passes I've not updated here are ones that use
AU.setPreservesAll() such as IVUsers (an analysis) and the pass printer.
They seemed less relevant.
With this patch, almost all of the problems in PR24804 around loop pass
pipelines are fixed. The one remaining issue is that we run simplify-cfg
and instcombine in the middle of the loop pass pipeline. We've recently
added some loop variants of these passes that would seem substantially
cleaner to use, but this at least gets us much closer to the previous
state. Notably, the seven loop pass managers is down to three.
I've not updated the loop passes using LoopAccessAnalysis because that
analysis hasn't been fully wired into LoopSimplify/LCSSA, and it isn't
clear that those transforms want to support those forms anyways. They
all run late anyways, so this is harmless. Similarly, LSR is left alone
because it already carefully manages its forms and doesn't need to get
fused into a single loop pass manager with a bunch of other loop passes.
LoopReroll didn't use loop simplified form previously, and I've updated
the test case to match the trivially different output.
Finally, I've also factored all the pass initialization for the passes
that use this technique as well, so that should be done regularly and
reliably.
Thanks to James for the help reviewing and thinking about this stuff,
and Ben for help thinking about it as well!
Differential Revision: http://reviews.llvm.org/D17435
llvm-svn: 261316
2016-02-19 18:45:18 +08:00
|
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
2007-04-07 09:25:15 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "loop-rotate"
|
|
|
|
|
2016-06-14 22:44:05 +08:00
|
|
|
static cl::opt<unsigned> DefaultRotationThreshold(
|
|
|
|
"rotation-max-header-size", cl::init(16), cl::Hidden,
|
|
|
|
cl::desc("The default maximum header size for automatic loop rotation"));
|
2007-04-07 09:25:15 +08:00
|
|
|
|
2016-12-22 14:59:15 +08:00
|
|
|
LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
|
|
|
|
: EnableHeaderDuplication(EnableHeaderDuplication) {}
|
2016-05-04 06:02:31 +08:00
|
|
|
|
2017-01-11 14:23:21 +08:00
|
|
|
PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
|
|
|
|
LoopStandardAnalysisResults &AR,
|
|
|
|
LPMUpdater &) {
|
2016-12-22 14:59:15 +08:00
|
|
|
int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
|
2017-04-26 21:52:18 +08:00
|
|
|
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
|
2017-04-29 06:05:55 +08:00
|
|
|
const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
|
2016-05-04 06:02:31 +08:00
|
|
|
|
2018-10-25 06:46:45 +08:00
|
|
|
Optional<MemorySSAUpdater> MSSAU;
|
|
|
|
if (AR.MSSA)
|
|
|
|
MSSAU = MemorySSAUpdater(AR.MSSA);
|
|
|
|
bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
|
|
|
|
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
|
|
|
|
SQ, false, Threshold, false);
|
2018-03-29 16:48:15 +08:00
|
|
|
|
2016-05-04 06:02:31 +08:00
|
|
|
if (!Changed)
|
|
|
|
return PreservedAnalyses::all();
|
2017-01-15 14:32:49 +08:00
|
|
|
|
2018-10-25 06:46:45 +08:00
|
|
|
if (AR.MSSA && VerifyMemorySSA)
|
|
|
|
AR.MSSA->verifyMemorySSA();
|
|
|
|
|
2019-06-12 02:27:49 +08:00
|
|
|
auto PA = getLoopPassPreservedAnalyses();
|
2019-08-17 09:02:12 +08:00
|
|
|
if (AR.MSSA)
|
2019-06-12 02:27:49 +08:00
|
|
|
PA.preserve<MemorySSAAnalysis>();
|
|
|
|
return PA;
|
2016-05-04 06:02:31 +08:00
|
|
|
}
|
|
|
|
|
2015-12-15 07:22:48 +08:00
|
|
|
namespace {
|
|
|
|
|
2016-05-04 06:02:31 +08:00
|
|
|
class LoopRotateLegacyPass : public LoopPass {
|
2015-12-15 07:22:48 +08:00
|
|
|
unsigned MaxHeaderSize;
|
|
|
|
|
|
|
|
public:
|
|
|
|
static char ID; // Pass ID, replacement for typeid
|
2016-05-04 06:02:31 +08:00
|
|
|
LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
|
|
|
|
initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
|
2015-12-15 07:22:48 +08:00
|
|
|
if (SpecifiedMaxHeaderSize == -1)
|
|
|
|
MaxHeaderSize = DefaultRotationThreshold;
|
|
|
|
else
|
|
|
|
MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
// LCSSA form makes instruction renaming easier.
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
2016-12-19 16:22:17 +08:00
|
|
|
AU.addRequired<AssumptionCacheTracker>();
|
2015-12-15 07:22:48 +08:00
|
|
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
2018-10-25 06:46:45 +08:00
|
|
|
if (EnableMSSALoopDependency) {
|
|
|
|
AU.addRequired<MemorySSAWrapperPass>();
|
|
|
|
AU.addPreserved<MemorySSAWrapperPass>();
|
|
|
|
}
|
[LPM] Factor all of the loop analysis usage updates into a common helper
routine.
We were getting this wrong in small ways and generally being very
inconsistent about it across loop passes. Instead, let's have a common
place where we do this. One minor downside is that this will require
some analyses like SCEV in more places than they are strictly needed.
However, this seems benign as these analyses are complete no-ops, and
without this consistency we can in many cases end up with the legacy
pass manager scheduling deciding to split up a loop pass pipeline in
order to run the function analysis half-way through. It is very, very
annoying to fix these without just being very pedantic across the board.
The only loop passes I've not updated here are ones that use
AU.setPreservesAll() such as IVUsers (an analysis) and the pass printer.
They seemed less relevant.
With this patch, almost all of the problems in PR24804 around loop pass
pipelines are fixed. The one remaining issue is that we run simplify-cfg
and instcombine in the middle of the loop pass pipeline. We've recently
added some loop variants of these passes that would seem substantially
cleaner to use, but this at least gets us much closer to the previous
state. Notably, the seven loop pass managers is down to three.
I've not updated the loop passes using LoopAccessAnalysis because that
analysis hasn't been fully wired into LoopSimplify/LCSSA, and it isn't
clear that those transforms want to support those forms anyways. They
all run late anyways, so this is harmless. Similarly, LSR is left alone
because it already carefully manages its forms and doesn't need to get
fused into a single loop pass manager with a bunch of other loop passes.
LoopReroll didn't use loop simplified form previously, and I've updated
the test case to match the trivially different output.
Finally, I've also factored all the pass initialization for the passes
that use this technique as well, so that should be done regularly and
reliably.
Thanks to James for the help reviewing and thinking about this stuff,
and Ben for help thinking about it as well!
Differential Revision: http://reviews.llvm.org/D17435
llvm-svn: 261316
2016-02-19 18:45:18 +08:00
|
|
|
getLoopAnalysisUsage(AU);
|
2015-12-15 07:22:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
|
2016-04-23 06:06:11 +08:00
|
|
|
if (skipLoop(L))
|
2015-12-15 07:22:48 +08:00
|
|
|
return false;
|
|
|
|
Function &F = *L->getHeader()->getParent();
|
|
|
|
|
|
|
|
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
|
|
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
2016-12-19 16:22:17 +08:00
|
|
|
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
2019-10-08 19:54:42 +08:00
|
|
|
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
2019-10-08 16:46:38 +08:00
|
|
|
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
2017-04-29 06:05:55 +08:00
|
|
|
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
|
2018-10-25 06:46:45 +08:00
|
|
|
Optional<MemorySSAUpdater> MSSAU;
|
|
|
|
if (EnableMSSALoopDependency) {
|
|
|
|
MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
|
|
|
|
MSSAU = MemorySSAUpdater(MSSA);
|
|
|
|
}
|
2019-10-08 19:54:42 +08:00
|
|
|
return LoopRotation(L, LI, TTI, AC, &DT, &SE,
|
2018-10-25 06:46:45 +08:00
|
|
|
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
|
|
|
|
false, MaxHeaderSize, false);
|
2015-12-15 07:22:48 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2016-05-04 06:02:31 +08:00
|
|
|
char LoopRotateLegacyPass::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
|
|
|
|
false, false)
|
2016-12-19 16:22:17 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
[LPM] Factor all of the loop analysis usage updates into a common helper
routine.
We were getting this wrong in small ways and generally being very
inconsistent about it across loop passes. Instead, let's have a common
place where we do this. One minor downside is that this will require
some analyses like SCEV in more places than they are strictly needed.
However, this seems benign as these analyses are complete no-ops, and
without this consistency we can in many cases end up with the legacy
pass manager scheduling deciding to split up a loop pass pipeline in
order to run the function analysis half-way through. It is very, very
annoying to fix these without just being very pedantic across the board.
The only loop passes I've not updated here are ones that use
AU.setPreservesAll() such as IVUsers (an analysis) and the pass printer.
They seemed less relevant.
With this patch, almost all of the problems in PR24804 around loop pass
pipelines are fixed. The one remaining issue is that we run simplify-cfg
and instcombine in the middle of the loop pass pipeline. We've recently
added some loop variants of these passes that would seem substantially
cleaner to use, but this at least gets us much closer to the previous
state. Notably, the seven loop pass managers is down to three.
I've not updated the loop passes using LoopAccessAnalysis because that
analysis hasn't been fully wired into LoopSimplify/LCSSA, and it isn't
clear that those transforms want to support those forms anyways. They
all run late anyways, so this is harmless. Similarly, LSR is left alone
because it already carefully manages its forms and doesn't need to get
fused into a single loop pass manager with a bunch of other loop passes.
LoopReroll didn't use loop simplified form previously, and I've updated
the test case to match the trivially different output.
Finally, I've also factored all the pass initialization for the passes
that use this technique as well, so that should be done regularly and
reliably.
Thanks to James for the help reviewing and thinking about this stuff,
and Ben for help thinking about it as well!
Differential Revision: http://reviews.llvm.org/D17435
llvm-svn: 261316
2016-02-19 18:45:18 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(LoopPass)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
2018-10-25 06:46:45 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
|
2016-06-14 22:44:05 +08:00
|
|
|
INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
|
|
|
|
false)
|
2015-12-15 07:22:48 +08:00
|
|
|
|
|
|
|
Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
|
2016-05-04 06:02:31 +08:00
|
|
|
return new LoopRotateLegacyPass(MaxHeaderSize);
|
2015-12-15 07:22:48 +08:00
|
|
|
}
|