2018-05-25 09:32:36 +08:00
|
|
|
//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass performs lightweight instruction simplification on loop bodies.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
|
|
|
|
#include "llvm/ADT/PointerIntPair.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
|
|
|
#include "llvm/Analysis/AssumptionCache.h"
|
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
#include "llvm/Analysis/LoopIterator.h"
|
2018-05-25 09:32:36 +08:00
|
|
|
#include "llvm/Analysis/LoopPass.h"
|
2018-08-23 04:05:21 +08:00
|
|
|
#include "llvm/Analysis/MemorySSA.h"
|
|
|
|
#include "llvm/Analysis/MemorySSAUpdater.h"
|
2018-05-25 09:32:36 +08:00
|
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/CFG.h"
|
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/Dominators.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/IR/PassManager.h"
|
|
|
|
#include "llvm/IR/User.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Transforms/Scalar.h"
|
2018-08-23 04:05:21 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2018-05-25 09:32:36 +08:00
|
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "loop-instsimplify"
|
|
|
|
|
|
|
|
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
|
2018-08-23 04:05:21 +08:00
|
|
|
AssumptionCache &AC, const TargetLibraryInfo &TLI,
|
|
|
|
MemorySSAUpdater *MSSAU) {
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
|
|
|
|
SimplifyQuery SQ(DL, &TLI, &DT, &AC);
|
|
|
|
|
|
|
|
// On the first pass over the loop body we try to simplify every instruction.
|
|
|
|
// On subsequent passes, we can restrict this to only simplifying instructions
|
|
|
|
// where the inputs have been updated. We end up needing two sets: one
|
|
|
|
// containing the instructions we are simplifying in *this* pass, and one for
|
|
|
|
// the instructions we will want to simplify in the *next* pass. We use
|
|
|
|
// pointers so we can swap between two stably allocated sets.
|
2018-05-25 09:32:36 +08:00
|
|
|
SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// Track the PHI nodes that have already been visited during each iteration so
|
|
|
|
// that we can identify when it is necessary to iterate.
|
|
|
|
SmallPtrSet<PHINode *, 4> VisitedPHIs;
|
2018-05-25 09:32:36 +08:00
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// While simplifying we may discover dead code or cause code to become dead.
|
|
|
|
// Keep track of all such instructions and we will delete them at the end.
|
|
|
|
SmallVector<Instruction *, 8> DeadInsts;
|
2018-05-25 09:32:36 +08:00
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// First we want to create an RPO traversal of the loop body. By processing in
|
|
|
|
// RPO we can ensure that definitions are processed prior to uses (for non PHI
|
|
|
|
// uses) in all cases. This ensures we maximize the simplifications in each
|
|
|
|
// iteration over the loop and minimizes the possible causes for continuing to
|
|
|
|
// iterate.
|
|
|
|
LoopBlocksRPO RPOT(&L);
|
|
|
|
RPOT.perform(&LI);
|
2018-08-23 04:05:21 +08:00
|
|
|
MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
|
2018-05-25 09:32:36 +08:00
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
bool Changed = false;
|
|
|
|
for (;;) {
|
2018-08-23 04:05:21 +08:00
|
|
|
if (MSSAU && VerifyMemorySSA)
|
|
|
|
MSSA->verifyMemorySSA();
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
for (BasicBlock *BB : RPOT) {
|
|
|
|
for (Instruction &I : *BB) {
|
|
|
|
if (auto *PI = dyn_cast<PHINode>(&I))
|
|
|
|
VisitedPHIs.insert(PI);
|
|
|
|
|
|
|
|
if (I.use_empty()) {
|
|
|
|
if (isInstructionTriviallyDead(&I, &TLI))
|
|
|
|
DeadInsts.push_back(&I);
|
2018-05-25 09:32:36 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// We special case the first iteration which we can detect due to the
|
|
|
|
// empty `ToSimplify` set.
|
|
|
|
bool IsFirstIteration = ToSimplify->empty();
|
2018-05-25 09:32:36 +08:00
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
if (!IsFirstIteration && !ToSimplify->count(&I))
|
2018-05-25 09:32:36 +08:00
|
|
|
continue;
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I));
|
|
|
|
if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
|
2018-05-25 09:32:36 +08:00
|
|
|
continue;
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
|
|
|
|
UI != UE;) {
|
|
|
|
Use &U = *UI++;
|
|
|
|
auto *UserI = cast<Instruction>(U.getUser());
|
|
|
|
U.set(V);
|
|
|
|
|
|
|
|
// If the instruction is used by a PHI node we have already processed
|
|
|
|
// we'll need to iterate on the loop body to converge, so add it to
|
|
|
|
// the next set.
|
|
|
|
if (auto *UserPI = dyn_cast<PHINode>(UserI))
|
|
|
|
if (VisitedPHIs.count(UserPI)) {
|
|
|
|
Next->insert(UserPI);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we are only simplifying targeted instructions and the user is an
|
|
|
|
// instruction in the loop body, add it to our set of targeted
|
|
|
|
// instructions. Because we process defs before uses (outside of PHIs)
|
|
|
|
// we won't have visited it yet.
|
|
|
|
//
|
|
|
|
// We also skip any uses outside of the loop being simplified. Those
|
|
|
|
// should always be PHI nodes due to LCSSA form, and we don't want to
|
|
|
|
// try to simplify those away.
|
|
|
|
assert((L.contains(UserI) || isa<PHINode>(UserI)) &&
|
|
|
|
"Uses outside the loop should be PHI nodes due to LCSSA!");
|
|
|
|
if (!IsFirstIteration && L.contains(UserI))
|
|
|
|
ToSimplify->insert(UserI);
|
|
|
|
}
|
2018-05-25 09:32:36 +08:00
|
|
|
|
2018-08-23 04:05:21 +08:00
|
|
|
if (MSSAU)
|
|
|
|
if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V))
|
|
|
|
if (MemoryAccess *MA = MSSA->getMemoryAccess(&I))
|
|
|
|
if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI))
|
|
|
|
MA->replaceAllUsesWith(ReplacementMA);
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
assert(I.use_empty() && "Should always have replaced all uses!");
|
|
|
|
if (isInstructionTriviallyDead(&I, &TLI))
|
|
|
|
DeadInsts.push_back(&I);
|
|
|
|
++NumSimplified;
|
|
|
|
Changed = true;
|
2018-05-25 09:32:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// Delete any dead instructions found thus far now that we've finished an
|
|
|
|
// iteration over all instructions in all the loop blocks.
|
|
|
|
if (!DeadInsts.empty()) {
|
|
|
|
Changed = true;
|
2018-08-23 04:05:21 +08:00
|
|
|
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU);
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
}
|
|
|
|
|
2018-08-23 04:05:21 +08:00
|
|
|
if (MSSAU && VerifyMemorySSA)
|
|
|
|
MSSA->verifyMemorySSA();
|
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// If we never found a PHI that needs to be simplified in the next
|
|
|
|
// iteration, we're done.
|
|
|
|
if (Next->empty())
|
|
|
|
break;
|
2018-05-25 09:32:36 +08:00
|
|
|
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
// Otherwise, put the next set in place for the next iteration and reset it
|
|
|
|
// and the visited PHIs for that iteration.
|
|
|
|
std::swap(Next, ToSimplify);
|
|
|
|
Next->clear();
|
|
|
|
VisitedPHIs.clear();
|
|
|
|
DeadInsts.clear();
|
|
|
|
}
|
2018-05-25 09:32:36 +08:00
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
class LoopInstSimplifyLegacyPass : public LoopPass {
|
|
|
|
public:
|
|
|
|
static char ID; // Pass ID, replacement for typeid
|
|
|
|
|
|
|
|
LoopInstSimplifyLegacyPass() : LoopPass(ID) {
|
|
|
|
initializeLoopInstSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
|
|
|
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
|
|
|
|
if (skipLoop(L))
|
|
|
|
return false;
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
|
|
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
|
|
AssumptionCache &AC =
|
|
|
|
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
|
2018-05-25 09:32:36 +08:00
|
|
|
*L->getHeader()->getParent());
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
const TargetLibraryInfo &TLI =
|
|
|
|
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
|
2018-08-23 04:05:21 +08:00
|
|
|
MemorySSA *MSSA = nullptr;
|
|
|
|
Optional<MemorySSAUpdater> MSSAU;
|
|
|
|
if (EnableMSSALoopDependency) {
|
|
|
|
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
|
|
|
|
MSSAU = MemorySSAUpdater(MSSA);
|
|
|
|
}
|
2018-05-25 09:32:36 +08:00
|
|
|
|
2018-08-23 04:05:21 +08:00
|
|
|
return simplifyLoopInst(*L, DT, LI, AC, TLI,
|
|
|
|
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
|
2018-05-25 09:32:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.addRequired<AssumptionCacheTracker>();
|
[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to
be both simpler and substantially more efficient.
Rather than use a hand-rolled iteration technique that isn't quite the
same as RPO, use the pre-built RPO loop body traversal utility.
Once visiting the loop body in RPO, we can assert that we visit defs
before uses reliably. When this is the case, the only need to iterate is
when simplifying a def that is used by a PHI node along a back-edge.
With this patch, the first pass over the loop body is just a complete
simplification of every instruction across the loop body. When we
encounter a use of a simplified instruction that stems from a PHI node
in the loop body that has already been visited (due to some cyclic CFG,
potentially the loop itself, or a nested loop, or unstructured control
flow), we recall that specific PHI node for the second iteration.
Nothing else needs to be preserved from iteration to iteration.
On the second and later iterations, only instructions known to have
simplified inputs are considered, each time starting from a set of PHIs
that had simplified inputs along the backedges.
Dead instructions are collected along the way, but deleted in a batch at
the end of each iteration making the iterations themselves substantially
simpler. This uses a new batch API for recursively deleting dead
instructions.
This alsa changes the routine to visit subloops. Because simplification
is fundamentally transitive, we may need to visit the entire loop body,
including subloops, to handle knock-on simplification.
I've added a basic test file that helps demonstrate that all of these
changes work. It includes both straight-forward loops with
simplifications as well as interesting PHI-structures, CFG-structures,
and a nested loop case.
Differential Revision: https://reviews.llvm.org/D47407
llvm-svn: 333461
2018-05-30 04:15:38 +08:00
|
|
|
AU.addRequired<DominatorTreeWrapperPass>();
|
2018-05-25 09:32:36 +08:00
|
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
|
|
|
AU.setPreservesCFG();
|
2018-08-23 04:05:21 +08:00
|
|
|
if (EnableMSSALoopDependency) {
|
|
|
|
AU.addRequired<MemorySSAWrapperPass>();
|
|
|
|
AU.addPreserved<MemorySSAWrapperPass>();
|
|
|
|
}
|
2018-05-25 09:32:36 +08:00
|
|
|
getLoopAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
|
|
|
|
LoopStandardAnalysisResults &AR,
|
|
|
|
LPMUpdater &) {
|
2018-08-23 04:05:21 +08:00
|
|
|
Optional<MemorySSAUpdater> MSSAU;
|
|
|
|
if (AR.MSSA) {
|
|
|
|
MSSAU = MemorySSAUpdater(AR.MSSA);
|
|
|
|
AR.MSSA->verifyMemorySSA();
|
|
|
|
}
|
|
|
|
if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
|
|
|
|
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
|
2018-05-25 09:32:36 +08:00
|
|
|
return PreservedAnalyses::all();
|
|
|
|
|
|
|
|
auto PA = getLoopPassPreservedAnalyses();
|
|
|
|
PA.preserveSet<CFGAnalyses>();
|
|
|
|
return PA;
|
|
|
|
}
|
|
|
|
|
|
|
|
char LoopInstSimplifyLegacyPass::ID = 0;
|
|
|
|
|
|
|
|
INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify",
|
|
|
|
"Simplify instructions in loops", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(LoopPass)
|
2018-08-23 04:05:21 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
|
2018-05-25 09:32:36 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(LoopInstSimplifyLegacyPass, "loop-instsimplify",
|
|
|
|
"Simplify instructions in loops", false, false)
|
|
|
|
|
|
|
|
Pass *llvm::createLoopInstSimplifyPass() {
|
|
|
|
return new LoopInstSimplifyLegacyPass();
|
|
|
|
}
|