2012-06-08 23:38:21 +08:00
|
|
|
//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-06-08 23:38:21 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass identifies loops where we can generate the PPC branch instructions
|
|
|
|
// that decrement and test the count register (CTR) (bdnz and friends).
|
|
|
|
//
|
|
|
|
// The pattern that defines the induction variable can changed depending on
|
|
|
|
// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
|
|
|
|
// normalizes induction variables, and the Loop Strength Reduction pass
|
|
|
|
// run by 'llc' may also make changes to the induction variable.
|
|
|
|
//
|
|
|
|
// Criteria for CTR loops:
|
|
|
|
// - Countable loops (w/ ind. var for a trip count)
|
|
|
|
// - Try inner-most loops first
|
|
|
|
// - No nested CTR loops.
|
|
|
|
// - No function calls in loops.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-01-07 19:48:04 +08:00
|
|
|
#include "PPC.h"
|
2017-06-30 07:28:45 +08:00
|
|
|
#include "PPCSubtarget.h"
|
2014-01-07 19:48:04 +08:00
|
|
|
#include "PPCTargetMachine.h"
|
2017-10-13 00:43:33 +08:00
|
|
|
#include "PPCTargetTransformInfo.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2014-01-07 19:48:04 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2017-10-13 00:43:33 +08:00
|
|
|
#include "llvm/Analysis/AssumptionCache.h"
|
2018-05-03 06:56:04 +08:00
|
|
|
#include "llvm/Analysis/CFG.h"
|
2017-10-13 00:43:33 +08:00
|
|
|
#include "llvm/Analysis/CodeMetrics.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2018-05-03 06:56:04 +08:00
|
|
|
#include "llvm/Analysis/LoopIterator.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
2015-01-15 10:16:27 +08:00
|
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
2017-10-13 00:43:33 +08:00
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
2018-06-05 05:23:21 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2017-06-30 07:28:45 +08:00
|
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
2017-10-13 00:43:33 +08:00
|
|
|
#include "llvm/CodeGen/TargetSchedule.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Constants.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2014-01-13 17:26:24 +08:00
|
|
|
#include "llvm/IR/Dominators.h"
|
2013-05-18 17:20:39 +08:00
|
|
|
#include "llvm/IR/InlineAsm.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
2014-03-04 19:17:44 +08:00
|
|
|
#include "llvm/IR/ValueHandle.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/PassSupport.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2012-06-08 23:38:21 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
2018-03-29 01:44:36 +08:00
|
|
|
#include "llvm/Transforms/Utils.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2013-05-21 04:46:30 +08:00
|
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#endif
|
|
|
|
|
2012-06-08 23:38:21 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 10:41:26 +08:00
|
|
|
#define DEBUG_TYPE "ctrloops"
|
|
|
|
|
Implement PPC counter loops as a late IR-level pass
The old PPCCTRLoops pass, like the Hexagon pass version from which it was
derived, could only handle some simple loops in canonical form. We cannot
directly adapt the new Hexagon hardware loops pass, however, because the
Hexagon pass contains a fundamental assumption that non-constant-trip-count
loops will contain a guard, and this is not always true (the result being that
incorrect negative counts can be generated). With this commit, we replace the
pass with a late IR-level pass which makes use of SE to calculate the
backedge-taken counts and safely generate the loop-count expressions (including
any necessary max() parts). This IR level pass inserts custom intrinsics that
are lowered into the desired decrement-and-branch instructions.
The most fragile part of this new implementation is that interfering uses of
the counter register must be detected on the IR level (and, on PPC, this also
includes any indirect branches in addition to function calls). Also, to make
all of this work, we need a variant of the mtctr instruction that is marked
as having side effects. Without this, machine-code level CSE, DCE, etc.
illegally transform the resulting code. Hopefully, this can be improved
in the future.
This new pass is smaller than the original (and much smaller than the new
Hexagon hardware loops pass), and can handle many additional cases correctly.
In addition, the preheader-creation code has been copied from LoopSimplify, and
after we decide on where it belongs, this code will be refactored so that it
can be explicitly shared (making this implementation even smaller).
The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for
the new Hexagon pass. There are a few classes of loops that this pass does not
transform (noted by FIXMEs in the files), but these deficiencies can be
addressed within the SE infrastructure (thus helping many other passes as well).
llvm-svn: 181927
2013-05-16 05:37:41 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
|
|
|
|
#endif
|
|
|
|
|
2012-06-08 23:38:21 +08:00
|
|
|
namespace {
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
struct PPCCTRLoopsVerify : public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
|
|
|
|
PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
|
|
|
|
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
2014-04-29 15:57:37 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
AU.addRequired<MachineDominatorTree>();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
2014-04-29 15:57:37 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
MachineDominatorTree *MDT;
|
|
|
|
};
|
|
|
|
|
|
|
|
char PPCCTRLoopsVerify::ID = 0;
|
|
|
|
#endif // NDEBUG
|
2012-06-08 23:38:21 +08:00
|
|
|
} // end anonymous namespace
|
|
|
|
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
#ifndef NDEBUG
|
|
|
|
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
|
|
|
|
"PowerPC CTR Loops Verify", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
|
|
|
INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
|
|
|
|
"PowerPC CTR Loops Verify", false, false)
|
|
|
|
|
|
|
|
FunctionPass *llvm::createPPCCTRLoopsVerify() {
|
|
|
|
return new PPCCTRLoopsVerify();
|
|
|
|
}
|
|
|
|
#endif // NDEBUG
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
2016-07-27 21:24:16 +08:00
|
|
|
static bool clobbersCTR(const MachineInstr &MI) {
|
|
|
|
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
|
|
|
const MachineOperand &MO = MI.getOperand(i);
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
if (MO.isReg()) {
|
|
|
|
if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
|
|
|
|
return true;
|
|
|
|
} else if (MO.isRegMask()) {
|
|
|
|
if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool verifyCTRBranch(MachineBasicBlock *MBB,
|
|
|
|
MachineBasicBlock::iterator I) {
|
|
|
|
MachineBasicBlock::iterator BI = I;
|
|
|
|
SmallSet<MachineBasicBlock *, 16> Visited;
|
|
|
|
SmallVector<MachineBasicBlock *, 8> Preds;
|
|
|
|
bool CheckPreds;
|
|
|
|
|
|
|
|
if (I == MBB->begin()) {
|
|
|
|
Visited.insert(MBB);
|
|
|
|
goto queue_preds;
|
|
|
|
} else
|
|
|
|
--I;
|
|
|
|
|
|
|
|
check_block:
|
|
|
|
Visited.insert(MBB);
|
|
|
|
if (I == MBB->end())
|
|
|
|
goto queue_preds;
|
|
|
|
|
|
|
|
CheckPreds = true;
|
|
|
|
for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
|
|
|
|
unsigned Opc = I->getOpcode();
|
2013-05-21 00:08:37 +08:00
|
|
|
if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
CheckPreds = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-07-27 21:24:16 +08:00
|
|
|
if (I != BI && clobbersCTR(*I)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
|
|
|
|
<< ") instruction " << *I
|
|
|
|
<< " clobbers CTR, invalidating "
|
|
|
|
<< printMBBReference(*BI->getParent()) << " ("
|
|
|
|
<< BI->getParent()->getFullName() << ") instruction "
|
|
|
|
<< *BI << "\n");
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (I == IE)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!CheckPreds && Preds.empty())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (CheckPreds) {
|
|
|
|
queue_preds:
|
|
|
|
if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
|
|
|
|
<< printMBBReference(*BI->getParent()) << " ("
|
|
|
|
<< BI->getParent()->getFullName() << ") instruction "
|
|
|
|
<< *BI << "\n");
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
|
|
|
|
PIE = MBB->pred_end(); PI != PIE; ++PI)
|
|
|
|
Preds.push_back(*PI);
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
MBB = Preds.pop_back_val();
|
|
|
|
if (!Visited.count(MBB)) {
|
|
|
|
I = MBB->getLastNonDebugInstr();
|
|
|
|
goto check_block;
|
|
|
|
}
|
|
|
|
} while (!Preds.empty());
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
MDT = &getAnalysis<MachineDominatorTree>();
|
|
|
|
|
|
|
|
// Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
|
|
|
|
// any other instructions that might clobber the ctr register.
|
|
|
|
for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
|
|
|
|
I != IE; ++I) {
|
2015-10-20 09:07:37 +08:00
|
|
|
MachineBasicBlock *MBB = &*I;
|
Add a PPCCTRLoops verification pass
When asserts are enabled, this adds a verification pass for PPC counter-loop
formation. Unfortunately, without sacrificing code quality, there is no better
way of forming counter-based loops except at the (late) IR level. This means
that we need to recognize, at the IR level, anything which might turn into a
function call (or indirect branch). Because this is currently a finite set of
things, and because SelectionDAG lowering is basic-block local, this can be
done. Nevertheless, it is fragile, and failure results in a miscompile. This
verification pass checks that all (reachable) counter-based branches are
dominated by a loop mtctr instruction, and that no instructions in between
clobber the counter register. If these conditions are not satisfied, then an
ICE will be triggered.
In short, this is to help us sleep better at night.
llvm-svn: 182295
2013-05-21 00:08:17 +08:00
|
|
|
if (!MDT->isReachableFromEntry(MBB))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
|
|
|
|
MIIE = MBB->end(); MII != MIIE; ++MII) {
|
|
|
|
unsigned Opc = MII->getOpcode();
|
|
|
|
if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
|
|
|
|
Opc == PPC::BDZ8 || Opc == PPC::BDZ)
|
|
|
|
if (!verifyCTRBranch(MBB, MII))
|
|
|
|
llvm_unreachable("Invalid PPC CTR loop!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
#endif // NDEBUG
|