2002-05-22 04:50:24 +08:00
|
|
|
//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2003-10-21 03:43:21 +08:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2003-10-21 03:43:21 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2002-05-22 04:50:24 +08:00
|
|
|
//
|
2002-10-09 05:36:33 +08:00
|
|
|
// Peephole optimize the CFG.
|
2002-05-22 04:50:24 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
|
|
#include "llvm/ADT/STLExtras.h"
|
|
|
|
#include "llvm/ADT/SetVector.h"
|
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
2013-11-12 20:24:36 +08:00
|
|
|
#include "llvm/Analysis/ConstantFolding.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
2013-01-07 11:08:10 +08:00
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Analysis/ValueTracking.h"
|
2014-03-04 19:45:46 +08:00
|
|
|
#include "llvm/IR/CFG.h"
|
2014-03-04 20:24:34 +08:00
|
|
|
#include "llvm/IR/ConstantRange.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
|
|
#include "llvm/IR/IRBuilder.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#include "llvm/IR/MDBuilder.h"
|
|
|
|
#include "llvm/IR/Metadata.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
2014-03-04 20:05:47 +08:00
|
|
|
#include "llvm/IR/NoFolder.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Operator.h"
|
2014-03-04 19:08:18 +08:00
|
|
|
#include "llvm/IR/PatternMatch.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Type.h"
|
2011-01-29 12:46:23 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2010-12-14 14:17:25 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2012-06-29 20:38:19 +08:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2014-08-15 23:46:38 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
#include "llvm/Transforms/Utils/ValueMapper.h"
|
2002-05-22 04:50:24 +08:00
|
|
|
#include <algorithm>
|
2004-10-18 12:07:22 +08:00
|
|
|
#include <map>
|
2012-12-04 00:50:05 +08:00
|
|
|
#include <set>
|
2004-01-09 14:12:26 +08:00
|
|
|
using namespace llvm;
|
2013-07-04 22:22:02 +08:00
|
|
|
using namespace PatternMatch;
|
2003-11-12 06:41:34 +08:00
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "simplifycfg"
|
|
|
|
|
2015-02-13 18:48:30 +08:00
|
|
|
// Chosen as 2 so as to be cheap, but still to have enough power to fold
|
|
|
|
// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
|
|
|
|
// To catch this, we need to fold a compare and a select, hence '2' being the
|
|
|
|
// minimum reasonable default.
|
2011-04-30 02:47:38 +08:00
|
|
|
static cl::opt<unsigned>
|
2015-02-13 18:48:30 +08:00
|
|
|
PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2),
|
|
|
|
cl::desc("Control the amount of phi node folding to perform (default = 2)"));
|
2011-04-30 02:47:38 +08:00
|
|
|
|
2011-01-29 12:46:23 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
|
|
|
|
cl::desc("Duplicate return instructions into unconditional branches"));
|
|
|
|
|
2012-09-21 06:37:36 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
|
|
|
|
cl::desc("Sink common instructions down to the end block"));
|
|
|
|
|
2014-01-25 01:20:08 +08:00
|
|
|
static cl::opt<bool> HoistCondStores(
|
|
|
|
"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
|
|
|
|
cl::desc("Hoist conditional stores if an unconditional store precedes"));
|
2013-04-30 05:28:24 +08:00
|
|
|
|
2012-09-26 17:44:49 +08:00
|
|
|
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
|
2014-11-17 17:13:57 +08:00
|
|
|
STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
|
2012-09-26 22:01:53 +08:00
|
|
|
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
|
2014-03-13 02:35:40 +08:00
|
|
|
STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
|
2014-11-27 23:13:14 +08:00
|
|
|
STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
|
2012-09-21 06:37:36 +08:00
|
|
|
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
|
2012-09-26 22:01:53 +08:00
|
|
|
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
|
2008-06-13 05:15:59 +08:00
|
|
|
|
2010-02-06 06:03:18 +08:00
|
|
|
namespace {
|
2014-10-14 09:58:26 +08:00
|
|
|
// The first field contains the value that the switch produces when a certain
|
|
|
|
// case group is selected, and the second field is a vector containing the cases
|
|
|
|
// composing the case group.
|
|
|
|
typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
|
|
|
|
SwitchCaseResultVectorTy;
|
|
|
|
// The first field contains the phi node that generates a result of the switch
|
|
|
|
// and the second field contains the value generated for a certain case in the switch
|
|
|
|
// for that PHI.
|
|
|
|
typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
|
|
|
|
|
2012-07-03 07:22:21 +08:00
|
|
|
/// ValueEqualityComparisonCase - Represents a case of a switch.
|
|
|
|
struct ValueEqualityComparisonCase {
|
|
|
|
ConstantInt *Value;
|
|
|
|
BasicBlock *Dest;
|
|
|
|
|
|
|
|
ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
|
|
|
|
: Value(Value), Dest(Dest) {}
|
|
|
|
|
|
|
|
bool operator<(ValueEqualityComparisonCase RHS) const {
|
|
|
|
// Comparing pointers is ok as we only rely on the order for uniquing.
|
|
|
|
return Value < RHS.Value;
|
|
|
|
}
|
2012-10-14 19:15:42 +08:00
|
|
|
|
|
|
|
bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
|
2012-07-03 07:22:21 +08:00
|
|
|
};
|
|
|
|
|
2010-02-06 06:03:18 +08:00
|
|
|
class SimplifyCFGOpt {
|
2013-01-07 11:53:25 +08:00
|
|
|
const TargetTransformInfo &TTI;
|
2015-03-10 10:37:25 +08:00
|
|
|
const DataLayout &DL;
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
unsigned BonusInstThreshold;
|
2015-01-04 20:03:27 +08:00
|
|
|
AssumptionCache *AC;
|
2010-02-06 06:03:18 +08:00
|
|
|
Value *isValueEqualityComparison(TerminatorInst *TI);
|
|
|
|
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
|
2012-07-03 07:22:21 +08:00
|
|
|
std::vector<ValueEqualityComparisonCase> &Cases);
|
2010-02-06 06:03:18 +08:00
|
|
|
bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
|
2011-05-19 04:35:38 +08:00
|
|
|
BasicBlock *Pred,
|
|
|
|
IRBuilder<> &Builder);
|
2011-05-19 04:53:17 +08:00
|
|
|
bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
|
|
|
|
IRBuilder<> &Builder);
|
2010-02-06 06:03:18 +08:00
|
|
|
|
2011-05-19 05:33:11 +08:00
|
|
|
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
|
2012-02-07 05:16:41 +08:00
|
|
|
bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
|
2010-12-13 14:25:44 +08:00
|
|
|
bool SimplifyUnreachable(UnreachableInst *UI);
|
2011-05-19 04:35:38 +08:00
|
|
|
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
|
2010-12-13 14:25:44 +08:00
|
|
|
bool SimplifyIndirectBr(IndirectBrInst *IBI);
|
2011-05-19 02:28:48 +08:00
|
|
|
bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
|
2011-05-19 04:35:38 +08:00
|
|
|
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
|
2010-12-13 14:25:44 +08:00
|
|
|
|
2010-02-06 06:03:18 +08:00
|
|
|
public:
|
2015-03-10 10:37:25 +08:00
|
|
|
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
|
|
|
|
unsigned BonusInstThreshold, AssumptionCache *AC)
|
|
|
|
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
|
2010-02-06 06:03:18 +08:00
|
|
|
bool run(BasicBlock *BB);
|
|
|
|
};
|
2015-06-23 17:49:53 +08:00
|
|
|
}
|
2010-02-06 06:03:18 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if it is safe to merge these two
|
2005-08-03 08:19:45 +08:00
|
|
|
/// terminator instructions together.
|
|
|
|
static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
|
|
|
|
if (SI1 == SI2) return false; // Can't merge with self!
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-08-03 08:19:45 +08:00
|
|
|
// It is not safe to merge these two switch instructions if they have a common
|
|
|
|
// successor, and if that successor has a PHI node, and if *that* PHI node has
|
|
|
|
// conflicting incoming values from the two switch blocks.
|
|
|
|
BasicBlock *SI1BB = SI1->getParent();
|
|
|
|
BasicBlock *SI2BB = SI2->getParent();
|
2007-04-02 09:44:59 +08:00
|
|
|
SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
|
|
|
|
if (SI1Succs.count(*I))
|
|
|
|
for (BasicBlock::iterator BBI = (*I)->begin();
|
2005-08-03 08:19:45 +08:00
|
|
|
isa<PHINode>(BBI); ++BBI) {
|
|
|
|
PHINode *PN = cast<PHINode>(BBI);
|
|
|
|
if (PN->getIncomingValueForBlock(SI1BB) !=
|
|
|
|
PN->getIncomingValueForBlock(SI2BB))
|
|
|
|
return false;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-08-03 08:19:45 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if it is safe and profitable to merge these two terminator
|
|
|
|
/// instructions together, where SI1 is an unconditional branch. PhiNodes will
|
|
|
|
/// store all PHI nodes in common successors.
|
2012-06-13 13:43:29 +08:00
|
|
|
static bool isProfitableToFoldUnconditional(BranchInst *SI1,
|
|
|
|
BranchInst *SI2,
|
2012-06-24 18:15:42 +08:00
|
|
|
Instruction *Cond,
|
2012-06-13 13:43:29 +08:00
|
|
|
SmallVectorImpl<PHINode*> &PhiNodes) {
|
|
|
|
if (SI1 == SI2) return false; // Can't merge with self!
|
|
|
|
assert(SI1->isUnconditional() && SI2->isConditional());
|
|
|
|
|
|
|
|
// We fold the unconditional branch if we can easily update all PHI nodes in
|
2012-08-30 05:46:36 +08:00
|
|
|
// common successors:
|
2012-06-13 13:43:29 +08:00
|
|
|
// 1> We have a constant incoming value for the conditional branch;
|
|
|
|
// 2> We have "Cond" as the incoming value for the unconditional branch;
|
|
|
|
// 3> SI2->getCondition() and Cond have same operands.
|
|
|
|
CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
|
|
|
|
if (!Ci2) return false;
|
|
|
|
if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
|
|
|
|
Cond->getOperand(1) == Ci2->getOperand(1)) &&
|
|
|
|
!(Cond->getOperand(0) == Ci2->getOperand(1) &&
|
|
|
|
Cond->getOperand(1) == Ci2->getOperand(0)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
BasicBlock *SI1BB = SI1->getParent();
|
|
|
|
BasicBlock *SI2BB = SI2->getParent();
|
|
|
|
SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
|
|
|
|
if (SI1Succs.count(*I))
|
|
|
|
for (BasicBlock::iterator BBI = (*I)->begin();
|
2012-06-13 13:43:29 +08:00
|
|
|
isa<PHINode>(BBI); ++BBI) {
|
|
|
|
PHINode *PN = cast<PHINode>(BBI);
|
|
|
|
if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
|
2012-06-24 18:15:42 +08:00
|
|
|
!isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
|
2012-06-13 13:43:29 +08:00
|
|
|
return false;
|
|
|
|
PhiNodes.push_back(PN);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Update PHI nodes in Succ to indicate that there will now be entries in it
|
|
|
|
/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
|
|
|
|
/// will be the same as those coming in from ExistPred, an existing predecessor
|
|
|
|
/// of Succ.
|
2005-08-03 08:19:45 +08:00
|
|
|
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
|
|
|
|
BasicBlock *ExistPred) {
|
|
|
|
if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:23:11 +08:00
|
|
|
PHINode *PN;
|
|
|
|
for (BasicBlock::iterator I = Succ->begin();
|
|
|
|
(PN = dyn_cast<PHINode>(I)); ++I)
|
|
|
|
PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
|
2005-08-03 08:19:45 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Compute an abstract "cost" of speculating the given instruction,
|
|
|
|
/// which is assumed to be safe to speculate. TCC_Free means cheap,
|
|
|
|
/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
|
2015-02-11 20:15:41 +08:00
|
|
|
/// expensive.
|
2015-03-10 10:37:25 +08:00
|
|
|
static unsigned ComputeSpeculationCost(const User *I,
|
2015-02-11 20:15:41 +08:00
|
|
|
const TargetTransformInfo &TTI) {
|
2015-03-10 10:37:25 +08:00
|
|
|
assert(isSafeToSpeculativelyExecute(I) &&
|
2012-01-06 07:58:56 +08:00
|
|
|
"Instruction is not safe to speculatively execute!");
|
2015-02-11 20:15:41 +08:00
|
|
|
return TTI.getUserCost(I);
|
2012-01-06 07:58:56 +08:00
|
|
|
}
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If we have a merge point of an "if condition" as accepted above,
|
|
|
|
/// return true if the specified value dominates the block. We
|
2009-01-20 07:43:56 +08:00
|
|
|
/// don't handle the true generality of domination here, just a special case
|
|
|
|
/// which works well enough for us.
|
|
|
|
///
|
|
|
|
/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
|
2011-04-30 02:47:31 +08:00
|
|
|
/// see if V (which must be an instruction) and its recursive operands
|
|
|
|
/// that do not dominate BB have a combined cost lower than CostRemaining and
|
|
|
|
/// are non-trapping. If both are true, the instruction is inserted into the
|
|
|
|
/// set and true is returned.
|
|
|
|
///
|
|
|
|
/// The cost for most non-trapping instructions is defined as 1 except for
|
|
|
|
/// Select whose cost is 2.
|
|
|
|
///
|
|
|
|
/// After this function returns, CostRemaining is decreased by the cost of
|
|
|
|
/// V plus its non-dominating operands. If that cost is greater than
|
|
|
|
/// CostRemaining, false is returned and CostRemaining is undefined.
|
2004-10-14 13:13:36 +08:00
|
|
|
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
|
2014-08-21 13:55:13 +08:00
|
|
|
SmallPtrSetImpl<Instruction*> *AggressiveInsts,
|
2014-07-10 22:41:31 +08:00
|
|
|
unsigned &CostRemaining,
|
2015-02-11 20:15:41 +08:00
|
|
|
const TargetTransformInfo &TTI) {
|
2004-04-10 06:50:22 +08:00
|
|
|
Instruction *I = dyn_cast<Instruction>(V);
|
2006-10-20 08:42:07 +08:00
|
|
|
if (!I) {
|
|
|
|
// Non-instructions all dominate instructions, but not all constantexprs
|
|
|
|
// can be executed unconditionally.
|
|
|
|
if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
|
|
|
|
if (C->canTrap())
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
2004-04-10 06:50:22 +08:00
|
|
|
BasicBlock *PBB = I->getParent();
|
|
|
|
|
2005-02-27 14:18:25 +08:00
|
|
|
// We don't want to allow weird loops that might have the "if condition" in
|
2004-04-10 06:50:22 +08:00
|
|
|
// the bottom of this block.
|
|
|
|
if (PBB == BB) return false;
|
|
|
|
|
|
|
|
// If this instruction is defined in a block that contains an unconditional
|
|
|
|
// branch to BB, then it must be in the 'conditional' part of the "if
|
2010-12-14 15:41:39 +08:00
|
|
|
// statement". If not, it definitely dominates the region.
|
|
|
|
BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
|
2010-12-14 15:41:39 +08:00
|
|
|
return true;
|
2009-07-17 12:28:42 +08:00
|
|
|
|
2010-12-14 15:41:39 +08:00
|
|
|
// If we aren't allowing aggressive promotion anymore, then don't consider
|
|
|
|
// instructions in the 'if region'.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!AggressiveInsts) return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2011-04-30 02:47:31 +08:00
|
|
|
// If we have seen this instruction before, don't count it again.
|
|
|
|
if (AggressiveInsts->count(I)) return true;
|
|
|
|
|
2010-12-14 15:41:39 +08:00
|
|
|
// Okay, it looks like the instruction IS in the "condition". Check to
|
|
|
|
// see if it's a cheap instruction to unconditionally compute, and if it
|
|
|
|
// only uses stuff defined outside of the condition. If so, hoist it out.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!isSafeToSpeculativelyExecute(I))
|
2010-12-14 15:41:39 +08:00
|
|
|
return false;
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
unsigned Cost = ComputeSpeculationCost(I, TTI);
|
2004-02-11 11:36:04 +08:00
|
|
|
|
2011-04-30 02:47:31 +08:00
|
|
|
if (Cost > CostRemaining)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CostRemaining -= Cost;
|
|
|
|
|
|
|
|
// Okay, we can only really hoist these out if their operands do
|
|
|
|
// not take us over the cost threshold.
|
2010-12-14 15:41:39 +08:00
|
|
|
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
|
2010-12-14 15:41:39 +08:00
|
|
|
return false;
|
|
|
|
// Okay, it's safe to do this! Remember this instruction.
|
|
|
|
AggressiveInsts->insert(I);
|
2004-02-11 11:36:04 +08:00
|
|
|
return true;
|
|
|
|
}
|
2002-05-22 04:50:24 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Extract ConstantInt from value, looking through IntToPtr
|
2010-02-06 06:03:18 +08:00
|
|
|
/// and PointerNullValue. Return NULL if value is not a constant int.
|
2015-03-10 10:37:25 +08:00
|
|
|
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
|
2010-02-06 06:03:18 +08:00
|
|
|
// Normal constant int.
|
|
|
|
ConstantInt *CI = dyn_cast<ConstantInt>(V);
|
2015-03-10 10:37:25 +08:00
|
|
|
if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
|
2010-02-06 06:03:18 +08:00
|
|
|
return CI;
|
|
|
|
|
|
|
|
// This is some kind of pointer constant. Turn it into a pointer-sized
|
|
|
|
// ConstantInt if possible.
|
2015-03-10 10:37:25 +08:00
|
|
|
IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
|
2010-02-06 06:03:18 +08:00
|
|
|
|
|
|
|
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
|
|
|
|
if (isa<ConstantPointerNull>(V))
|
|
|
|
return ConstantInt::get(PtrTy, 0);
|
|
|
|
|
|
|
|
// IntToPtr const int.
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
|
|
|
|
if (CE->getOpcode() == Instruction::IntToPtr)
|
|
|
|
if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
|
|
|
|
// The constant is very likely to have the right type already.
|
|
|
|
if (CI->getType() == PtrTy)
|
|
|
|
return CI;
|
|
|
|
else
|
|
|
|
return cast<ConstantInt>
|
|
|
|
(ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
|
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-02-06 06:03:18 +08:00
|
|
|
}
|
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
namespace {
|
2014-11-20 20:36:43 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
/// Given a chain of or (||) or and (&&) comparison of a value against a
|
|
|
|
/// constant, this will try to recover the information required for a switch
|
|
|
|
/// structure.
|
|
|
|
/// It will depth-first traverse the chain of comparison, seeking for patterns
|
|
|
|
/// like %a == 12 or %a < 4 and combine them to produce a set of integer
|
|
|
|
/// representing the different cases for the switch.
|
|
|
|
/// Note that if the chain is composed of '||' it will build the set of elements
|
|
|
|
/// that matches the comparisons (i.e. any of this value validate the chain)
|
|
|
|
/// while for a chain of '&&' it will build the set elements that make the test
|
|
|
|
/// fail.
|
|
|
|
struct ConstantComparesGatherer {
|
2015-03-10 10:37:25 +08:00
|
|
|
const DataLayout &DL;
|
2014-11-21 06:40:25 +08:00
|
|
|
Value *CompValue; /// Value found for the switch comparison
|
|
|
|
Value *Extra; /// Extra clause to be checked before the switch
|
|
|
|
SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
|
|
|
|
unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
|
|
|
|
|
|
|
|
/// Construct and compute the result for the comparison instruction Cond
|
2015-03-10 10:37:25 +08:00
|
|
|
ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
|
|
|
|
: DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
|
|
|
|
gather(Cond);
|
2014-11-20 04:09:11 +08:00
|
|
|
}
|
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
/// Prevent copy
|
2015-02-16 06:54:22 +08:00
|
|
|
ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
|
2014-11-21 06:40:25 +08:00
|
|
|
ConstantComparesGatherer &
|
2015-02-16 06:54:22 +08:00
|
|
|
operator=(const ConstantComparesGatherer &) = delete;
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
private:
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
/// Try to set the current value used for the comparison, it succeeds only if
|
|
|
|
/// it wasn't set before or if the new value is the same as the old one
|
|
|
|
bool setValueOnce(Value *NewVal) {
|
|
|
|
if(CompValue && CompValue != NewVal) return false;
|
|
|
|
CompValue = NewVal;
|
|
|
|
return (CompValue != nullptr);
|
2014-11-20 04:09:11 +08:00
|
|
|
}
|
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
/// Try to match Instruction "I" as a comparison against a constant and
|
|
|
|
/// populates the array Vals with the set of values that match (or do not
|
|
|
|
/// match depending on isEQ).
|
|
|
|
/// Return false on failure. On success, the Value the comparison matched
|
|
|
|
/// against is placed in CompValue.
|
|
|
|
/// If CompValue is already set, the function is expected to fail if a match
|
|
|
|
/// is found but the value compared to is different.
|
2015-03-10 10:37:25 +08:00
|
|
|
bool matchInstruction(Instruction *I, bool isEQ) {
|
2014-11-21 06:40:25 +08:00
|
|
|
// If this is an icmp against a constant, handle this as one of the cases.
|
|
|
|
ICmpInst *ICI;
|
|
|
|
ConstantInt *C;
|
|
|
|
if (!((ICI = dyn_cast<ICmpInst>(I)) &&
|
|
|
|
(C = GetConstantInt(I->getOperand(1), DL)))) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
Value *RHSVal;
|
|
|
|
ConstantInt *RHSC;
|
|
|
|
|
|
|
|
// Pattern match a special case
|
|
|
|
// (x & ~2^x) == y --> x == y || x == y|2^x
|
|
|
|
// This undoes a transformation done by instcombine to fuse 2 compares.
|
|
|
|
if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
|
|
|
|
if (match(ICI->getOperand(0),
|
|
|
|
m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
|
|
|
|
APInt Not = ~RHSC->getValue();
|
|
|
|
if (Not.isPowerOf2()) {
|
|
|
|
// If we already have a value for the switch, it has to match!
|
|
|
|
if(!setValueOnce(RHSVal))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Vals.push_back(C);
|
|
|
|
Vals.push_back(ConstantInt::get(C->getContext(),
|
|
|
|
C->getValue() | Not));
|
|
|
|
UsedICmps++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// If we already have a value for the switch, it has to match!
|
|
|
|
if(!setValueOnce(ICI->getOperand(0)))
|
|
|
|
return false;
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
UsedICmps++;
|
|
|
|
Vals.push_back(C);
|
|
|
|
return ICI->getOperand(0);
|
|
|
|
}
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
|
2015-03-18 08:41:24 +08:00
|
|
|
ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
|
|
|
|
ICI->getPredicate(), C->getValue());
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// Shift the range if the compare is fed by an add. This is the range
|
|
|
|
// compare idiom as emitted by instcombine.
|
|
|
|
Value *CandidateVal = I->getOperand(0);
|
|
|
|
if(match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
|
|
|
|
Span = Span.subtract(RHSC->getValue());
|
|
|
|
CandidateVal = RHSVal;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// If this is an and/!= check, then we are looking to build the set of
|
|
|
|
// value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
|
|
|
|
// x != 0 && x != 1.
|
|
|
|
if (!isEQ)
|
|
|
|
Span = Span.inverse();
|
2013-07-04 22:22:02 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// If there are a ton of values, we don't want to make a ginormous switch.
|
|
|
|
if (Span.getSetSize().ugt(8) || Span.isEmptySet()) {
|
|
|
|
return false;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// If we already have a value for the switch, it has to match!
|
|
|
|
if(!setValueOnce(CandidateVal))
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// Add all values from the range to the set
|
|
|
|
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
|
|
|
|
Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
|
enhance the "change or icmp's into switch" xform to handle one value in an
'or sequence' that it doesn't understand. This allows us to optimize
something insane like this:
int crud (unsigned char c, unsigned x)
{
if(((((((((( (int) c <= 32 ||
(int) c == 46) || (int) c == 44)
|| (int) c == 58) || (int) c == 59) || (int) c == 60)
|| (int) c == 62) || (int) c == 34) || (int) c == 92)
|| (int) c == 39) != 0)
foo();
}
into:
define i32 @crud(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
entry:
%cmp = icmp ult i8 %c, 33
br i1 %cmp, label %if.then, label %switch.early.test
switch.early.test: ; preds = %entry
switch i8 %c, label %if.end [
i8 39, label %if.then
i8 44, label %if.then
i8 58, label %if.then
i8 59, label %if.then
i8 60, label %if.then
i8 62, label %if.then
i8 46, label %if.then
i8 92, label %if.then
i8 34, label %if.then
]
by pulling the < comparison out ahead of the newly formed switch.
llvm-svn: 121680
2010-12-13 12:50:38 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
UsedICmps++;
|
|
|
|
return true;
|
2014-11-20 04:09:11 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
}
|
2014-11-20 20:36:43 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given a potentially 'or'd or 'and'd together collection of icmp
|
2014-11-21 06:40:25 +08:00
|
|
|
/// eq/ne/lt/gt instructions that compare a value against a constant, extract
|
|
|
|
/// the value being compared, and stick the list constants into the Vals
|
|
|
|
/// vector.
|
|
|
|
/// One "Extra" case is allowed to differ from the other.
|
2015-03-10 10:37:25 +08:00
|
|
|
void gather(Value *V) {
|
2014-11-21 06:40:25 +08:00
|
|
|
Instruction *I = dyn_cast<Instruction>(V);
|
|
|
|
bool isEQ = (I->getOpcode() == Instruction::Or);
|
|
|
|
|
|
|
|
// Keep a stack (SmallVector for efficiency) for depth-first traversal
|
|
|
|
SmallVector<Value *, 8> DFT;
|
|
|
|
|
|
|
|
// Initialize
|
|
|
|
DFT.push_back(V);
|
|
|
|
|
|
|
|
while(!DFT.empty()) {
|
|
|
|
V = DFT.pop_back_val();
|
|
|
|
|
|
|
|
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
|
|
|
// If it is a || (or && depending on isEQ), process the operands.
|
|
|
|
if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
|
|
|
|
DFT.push_back(I->getOperand(1));
|
|
|
|
DFT.push_back(I->getOperand(0));
|
|
|
|
continue;
|
|
|
|
}
|
2014-11-20 20:36:43 +08:00
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// Try to match the current instruction
|
2015-03-10 10:37:25 +08:00
|
|
|
if (matchInstruction(I, isEQ))
|
2014-11-21 06:40:25 +08:00
|
|
|
// Match succeed, continue the loop
|
|
|
|
continue;
|
2014-11-20 04:09:11 +08:00
|
|
|
}
|
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
// One element of the sequence of || (or &&) could not be match as a
|
|
|
|
// comparison against the same value as the others.
|
|
|
|
// We allow only one "Extra" case to be checked before the switch
|
|
|
|
if (!Extra) {
|
|
|
|
Extra = V;
|
2014-11-20 04:09:11 +08:00
|
|
|
continue;
|
|
|
|
}
|
2014-11-21 06:40:25 +08:00
|
|
|
// Failed to parse a proper sequence, abort now
|
|
|
|
CompValue = nullptr;
|
|
|
|
break;
|
enhance the "change or icmp's into switch" xform to handle one value in an
'or sequence' that it doesn't understand. This allows us to optimize
something insane like this:
int crud (unsigned char c, unsigned x)
{
if(((((((((( (int) c <= 32 ||
(int) c == 46) || (int) c == 44)
|| (int) c == 58) || (int) c == 59) || (int) c == 60)
|| (int) c == 62) || (int) c == 34) || (int) c == 92)
|| (int) c == 39) != 0)
foo();
}
into:
define i32 @crud(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
entry:
%cmp = icmp ult i8 %c, 33
br i1 %cmp, label %if.then, label %switch.early.test
switch.early.test: ; preds = %entry
switch i8 %c, label %if.end [
i8 39, label %if.then
i8 44, label %if.then
i8 58, label %if.then
i8 59, label %if.then
i8 60, label %if.then
i8 62, label %if.then
i8 46, label %if.then
i8 92, label %if.then
i8 34, label %if.then
]
by pulling the < comparison out ahead of the newly formed switch.
llvm-svn: 121680
2010-12-13 12:50:38 +08:00
|
|
|
}
|
2008-02-20 19:26:25 +08:00
|
|
|
}
|
2014-11-21 06:40:25 +08:00
|
|
|
};
|
2014-11-20 20:36:43 +08:00
|
|
|
|
2015-06-23 17:49:53 +08:00
|
|
|
}
|
2011-12-27 04:37:40 +08:00
|
|
|
|
2008-12-17 04:54:32 +08:00
|
|
|
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
|
2014-04-25 13:29:35 +08:00
|
|
|
Instruction *Cond = nullptr;
|
2008-12-17 04:54:32 +08:00
|
|
|
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
|
|
|
|
Cond = dyn_cast<Instruction>(SI->getCondition());
|
|
|
|
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
|
|
|
|
if (BI->isConditional())
|
|
|
|
Cond = dyn_cast<Instruction>(BI->getCondition());
|
2010-12-06 02:29:03 +08:00
|
|
|
} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
|
|
|
|
Cond = dyn_cast<Instruction>(IBI->getAddress());
|
2008-12-17 04:54:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TI->eraseFromParent();
|
|
|
|
if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if the specified terminator checks
|
2008-11-28 07:25:44 +08:00
|
|
|
/// to see if a value is equal to constant integer value.
|
2010-02-06 06:03:18 +08:00
|
|
|
Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *CV = nullptr;
|
2004-03-17 03:45:22 +08:00
|
|
|
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
|
|
|
|
// Do not permit merging of large switch instructions into their
|
|
|
|
// predecessors unless there is only one predecessor.
|
2010-02-06 06:03:18 +08:00
|
|
|
if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
|
|
|
|
pred_end(SI->getParent())) <= 128)
|
|
|
|
CV = SI->getCondition();
|
|
|
|
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
if (BI->isConditional() && BI->getCondition()->hasOneUse())
|
2015-03-10 10:37:25 +08:00
|
|
|
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
|
2014-02-21 08:06:31 +08:00
|
|
|
if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
|
2010-02-06 06:03:18 +08:00
|
|
|
CV = ICI->getOperand(0);
|
2015-03-10 10:37:25 +08:00
|
|
|
}
|
2010-02-06 06:03:18 +08:00
|
|
|
|
|
|
|
// Unwrap any lossless ptrtoint cast.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (CV) {
|
2013-10-22 02:55:08 +08:00
|
|
|
if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
|
|
|
|
Value *Ptr = PTII->getPointerOperand();
|
2015-03-10 10:37:25 +08:00
|
|
|
if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
|
2013-10-22 02:55:08 +08:00
|
|
|
CV = Ptr;
|
|
|
|
}
|
|
|
|
}
|
2010-02-06 06:03:18 +08:00
|
|
|
return CV;
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given a value comparison instruction,
|
2009-01-20 07:43:56 +08:00
|
|
|
/// decode all of the 'cases' that it represents and return the 'default' block.
|
2010-02-06 06:03:18 +08:00
|
|
|
BasicBlock *SimplifyCFGOpt::
|
2005-04-22 07:48:37 +08:00
|
|
|
GetValueEqualityComparisonCases(TerminatorInst *TI,
|
2012-07-03 07:22:21 +08:00
|
|
|
std::vector<ValueEqualityComparisonCase>
|
|
|
|
&Cases) {
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
|
2012-07-03 07:22:21 +08:00
|
|
|
Cases.reserve(SI->getNumCases());
|
|
|
|
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
|
|
|
|
Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(),
|
|
|
|
i.getCaseSuccessor()));
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
return SI->getDefaultDest();
|
|
|
|
}
|
2012-07-03 07:22:21 +08:00
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
BranchInst *BI = cast<BranchInst>(TI);
|
2006-12-23 14:05:41 +08:00
|
|
|
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
|
2012-07-03 07:22:21 +08:00
|
|
|
BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
|
|
|
|
Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
|
2014-02-21 08:06:31 +08:00
|
|
|
DL),
|
2012-07-03 07:22:21 +08:00
|
|
|
Succ));
|
2006-12-23 14:05:41 +08:00
|
|
|
return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
}
|
|
|
|
|
2012-07-03 07:22:21 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given a vector of bb/value pairs, remove any entries
|
2012-07-03 07:22:21 +08:00
|
|
|
/// in the list that match the specified block.
|
|
|
|
static void EliminateBlockCases(BasicBlock *BB,
|
|
|
|
std::vector<ValueEqualityComparisonCase> &Cases) {
|
2012-10-14 19:15:42 +08:00
|
|
|
Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
|
2012-07-03 07:22:21 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if there are any keys in C1 that exist in C2 as well.
|
2012-07-03 07:22:21 +08:00
|
|
|
static bool
|
|
|
|
ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
|
|
|
|
std::vector<ValueEqualityComparisonCase > &C2) {
|
|
|
|
std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
|
|
|
|
|
|
|
|
// Make V1 be smaller than V2.
|
|
|
|
if (V1->size() > V2->size())
|
|
|
|
std::swap(V1, V2);
|
|
|
|
|
|
|
|
if (V1->size() == 0) return false;
|
|
|
|
if (V1->size() == 1) {
|
|
|
|
// Just scan V2.
|
|
|
|
ConstantInt *TheVal = (*V1)[0].Value;
|
|
|
|
for (unsigned i = 0, e = V2->size(); i != e; ++i)
|
|
|
|
if (TheVal == (*V2)[i].Value)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, just sort both lists and compare element by element.
|
|
|
|
array_pod_sort(V1->begin(), V1->end());
|
|
|
|
array_pod_sort(V2->begin(), V2->end());
|
|
|
|
unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
|
|
|
|
while (i1 != e1 && i2 != e2) {
|
|
|
|
if ((*V1)[i1].Value == (*V2)[i2].Value)
|
|
|
|
return true;
|
|
|
|
if ((*V1)[i1].Value < (*V2)[i2].Value)
|
|
|
|
++i1;
|
|
|
|
else
|
|
|
|
++i2;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If TI is known to be a terminator instruction and its block is known to
|
|
|
|
/// only have a single predecessor block, check to see if that predecessor is
|
|
|
|
/// also a value comparison with the same value, and if that comparison
|
|
|
|
/// determines the outcome of this comparison. If so, simplify TI. This does a
|
|
|
|
/// very limited form of jump threading.
|
2010-02-06 06:03:18 +08:00
|
|
|
bool SimplifyCFGOpt::
|
|
|
|
SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
|
2011-05-19 04:35:38 +08:00
|
|
|
BasicBlock *Pred,
|
|
|
|
IRBuilder<> &Builder) {
|
2005-02-24 14:17:52 +08:00
|
|
|
Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
|
|
|
|
if (!PredVal) return false; // Not a value comparison in predecessor.
|
|
|
|
|
|
|
|
Value *ThisVal = isValueEqualityComparison(TI);
|
|
|
|
assert(ThisVal && "This isn't a value comparison!!");
|
|
|
|
if (ThisVal != PredVal) return false; // Different predicates.
|
|
|
|
|
2012-08-30 05:46:38 +08:00
|
|
|
// TODO: Preserve branch weight metadata, similarly to how
|
|
|
|
// FoldValueComparisonIntoPredecessors preserves it.
|
|
|
|
|
2005-02-24 14:17:52 +08:00
|
|
|
// Find out information about when control will move from Pred to TI's block.
|
2012-07-03 07:22:21 +08:00
|
|
|
std::vector<ValueEqualityComparisonCase> PredCases;
|
2005-02-24 14:17:52 +08:00
|
|
|
BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
|
|
|
|
PredCases);
|
2012-07-03 07:22:21 +08:00
|
|
|
EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2005-02-24 14:17:52 +08:00
|
|
|
// Find information about how control leaves this block.
|
2012-07-03 07:22:21 +08:00
|
|
|
std::vector<ValueEqualityComparisonCase> ThisCases;
|
2005-02-24 14:17:52 +08:00
|
|
|
BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
|
2012-07-03 07:22:21 +08:00
|
|
|
EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
|
2005-02-24 14:17:52 +08:00
|
|
|
|
|
|
|
// If TI's block is the default block from Pred's comparison, potentially
|
|
|
|
// simplify TI based on this knowledge.
|
|
|
|
if (PredDef == TI->getParent()) {
|
|
|
|
// If we are here, we know that the value is none of those cases listed in
|
|
|
|
// PredCases. If there are any cases in ThisCases that are in PredCases, we
|
|
|
|
// can simplify TI.
|
2012-07-03 07:22:21 +08:00
|
|
|
if (!ValuesOverlap(PredCases, ThisCases))
|
2010-12-13 09:47:07 +08:00
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
if (isa<BranchInst>(TI)) {
|
|
|
|
// Okay, one of the successors of this condbr is dead. Convert it to a
|
|
|
|
// uncond br.
|
|
|
|
assert(ThisCases.size() == 1 && "Branch can only have one case!");
|
|
|
|
// Insert the new branch.
|
2011-05-19 04:35:38 +08:00
|
|
|
Instruction *NI = Builder.CreateBr(ThisDef);
|
2010-12-13 09:47:07 +08:00
|
|
|
(void) NI;
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Remove PHI node entries for the dead edge.
|
2012-07-03 07:22:21 +08:00
|
|
|
ThisCases[0].Dest->removePredecessor(TI->getParent());
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
|
|
|
|
<< "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
EraseTerminatorInstAndDCECond(TI);
|
|
|
|
return true;
|
2005-02-24 14:17:52 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
SwitchInst *SI = cast<SwitchInst>(TI);
|
|
|
|
// Okay, TI has cases that are statically dead, prune them away.
|
2012-07-03 07:22:21 +08:00
|
|
|
SmallPtrSet<Constant*, 16> DeadCases;
|
|
|
|
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
|
|
|
|
DeadCases.insert(PredCases[i].Value);
|
2010-12-13 09:47:07 +08:00
|
|
|
|
|
|
|
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
|
|
|
|
<< "Through successor TI: " << *TI);
|
|
|
|
|
2012-09-15 05:53:06 +08:00
|
|
|
// Collect branch weights into a vector.
|
|
|
|
SmallVector<uint32_t, 8> Weights;
|
2014-11-12 05:30:22 +08:00
|
|
|
MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
|
2012-09-15 05:53:06 +08:00
|
|
|
bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
|
|
|
|
if (HasWeight)
|
|
|
|
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
|
|
|
|
++MD_i) {
|
IR: Split Metadata from Value
Split `Metadata` away from the `Value` class hierarchy, as part of
PR21532. Assembly and bitcode changes are in the wings, but this is the
bulk of the change for the IR C++ API.
I have a follow-up patch prepared for `clang`. If this breaks other
sub-projects, I apologize in advance :(. Help me compile it on Darwin
I'll try to fix it. FWIW, the errors should be easy to fix, so it may
be simpler to just fix it yourself.
This breaks the build for all metadata-related code that's out-of-tree.
Rest assured the transition is mechanical and the compiler should catch
almost all of the problems.
Here's a quick guide for updating your code:
- `Metadata` is the root of a class hierarchy with three main classes:
`MDNode`, `MDString`, and `ValueAsMetadata`. It is distinct from
the `Value` class hierarchy. It is typeless -- i.e., instances do
*not* have a `Type`.
- `MDNode`'s operands are all `Metadata *` (instead of `Value *`).
- `TrackingVH<MDNode>` and `WeakVH` referring to metadata can be
replaced with `TrackingMDNodeRef` and `TrackingMDRef`, respectively.
If you're referring solely to resolved `MDNode`s -- post graph
construction -- just use `MDNode*`.
- `MDNode` (and the rest of `Metadata`) have only limited support for
`replaceAllUsesWith()`.
As long as an `MDNode` is pointing at a forward declaration -- the
result of `MDNode::getTemporary()` -- it maintains a side map of its
uses and can RAUW itself. Once the forward declarations are fully
resolved RAUW support is dropped on the ground. This means that
uniquing collisions on changing operands cause nodes to become
"distinct". (This already happened fairly commonly, whenever an
operand went to null.)
If you're constructing complex (non self-reference) `MDNode` cycles,
you need to call `MDNode::resolveCycles()` on each node (or on a
top-level node that somehow references all of the nodes). Also,
don't do that. Metadata cycles (and the RAUW machinery needed to
construct them) are expensive.
- An `MDNode` can only refer to a `Constant` through a bridge called
`ConstantAsMetadata` (one of the subclasses of `ValueAsMetadata`).
As a side effect, accessing an operand of an `MDNode` that is known
to be, e.g., `ConstantInt`, takes three steps: first, cast from
`Metadata` to `ConstantAsMetadata`; second, extract the `Constant`;
third, cast down to `ConstantInt`.
The eventual goal is to introduce `MDInt`/`MDFloat`/etc. and have
metadata schema owners transition away from using `Constant`s when
the type isn't important (and they don't care about referring to
`GlobalValue`s).
In the meantime, I've added transitional API to the `mdconst`
namespace that matches semantics with the old code, in order to
avoid adding the error-prone three-step equivalent to every call
site. If your old code was:
MDNode *N = foo();
bar(isa <ConstantInt>(N->getOperand(0)));
baz(cast <ConstantInt>(N->getOperand(1)));
bak(cast_or_null <ConstantInt>(N->getOperand(2)));
bat(dyn_cast <ConstantInt>(N->getOperand(3)));
bay(dyn_cast_or_null<ConstantInt>(N->getOperand(4)));
you can trivially match its semantics with:
MDNode *N = foo();
bar(mdconst::hasa <ConstantInt>(N->getOperand(0)));
baz(mdconst::extract <ConstantInt>(N->getOperand(1)));
bak(mdconst::extract_or_null <ConstantInt>(N->getOperand(2)));
bat(mdconst::dyn_extract <ConstantInt>(N->getOperand(3)));
bay(mdconst::dyn_extract_or_null<ConstantInt>(N->getOperand(4)));
and when you transition your metadata schema to `MDInt`:
MDNode *N = foo();
bar(isa <MDInt>(N->getOperand(0)));
baz(cast <MDInt>(N->getOperand(1)));
bak(cast_or_null <MDInt>(N->getOperand(2)));
bat(dyn_cast <MDInt>(N->getOperand(3)));
bay(dyn_cast_or_null<MDInt>(N->getOperand(4)));
- A `CallInst` -- specifically, intrinsic instructions -- can refer to
metadata through a bridge called `MetadataAsValue`. This is a
subclass of `Value` where `getType()->isMetadataTy()`.
`MetadataAsValue` is the *only* class that can legally refer to a
`LocalAsMetadata`, which is a bridged form of non-`Constant` values
like `Argument` and `Instruction`. It can also refer to any other
`Metadata` subclass.
(I'll break all your testcases in a follow-up commit, when I propagate
this change to assembly.)
llvm-svn: 223802
2014-12-10 02:38:53 +08:00
|
|
|
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
|
2012-09-15 05:53:06 +08:00
|
|
|
Weights.push_back(CI->getValue().getZExtValue());
|
|
|
|
}
|
2012-07-03 07:22:21 +08:00
|
|
|
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
|
|
|
|
--i;
|
|
|
|
if (DeadCases.count(i.getCaseValue())) {
|
2012-09-15 05:53:06 +08:00
|
|
|
if (HasWeight) {
|
|
|
|
std::swap(Weights[i.getCaseIndex()+1], Weights.back());
|
|
|
|
Weights.pop_back();
|
|
|
|
}
|
2012-07-03 07:22:21 +08:00
|
|
|
i.getCaseSuccessor()->removePredecessor(TI->getParent());
|
|
|
|
SI->removeCase(i);
|
|
|
|
}
|
|
|
|
}
|
2012-10-12 06:28:34 +08:00
|
|
|
if (HasWeight && Weights.size() >= 2)
|
2012-09-15 05:53:06 +08:00
|
|
|
SI->setMetadata(LLVMContext::MD_prof,
|
|
|
|
MDBuilder(SI->getParent()->getContext()).
|
|
|
|
createBranchWeights(Weights));
|
2012-07-03 07:22:21 +08:00
|
|
|
|
|
|
|
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
|
2010-12-13 09:47:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Otherwise, TI's block must correspond to some matched value. Find out
|
|
|
|
// which value (or set of values) this is.
|
2014-04-25 13:29:35 +08:00
|
|
|
ConstantInt *TIV = nullptr;
|
2010-12-13 09:47:07 +08:00
|
|
|
BasicBlock *TIBB = TI->getParent();
|
2012-07-03 07:22:21 +08:00
|
|
|
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
|
|
|
|
if (PredCases[i].Dest == TIBB) {
|
2014-04-25 13:29:35 +08:00
|
|
|
if (TIV)
|
2012-07-03 07:22:21 +08:00
|
|
|
return false; // Cannot handle multiple values coming to this block.
|
|
|
|
TIV = PredCases[i].Value;
|
|
|
|
}
|
|
|
|
assert(TIV && "No edge from pred to succ?");
|
2010-12-13 09:47:07 +08:00
|
|
|
|
|
|
|
// Okay, we found the one constant that our value can be if we get into TI's
|
|
|
|
// BB. Find out which successor will unconditionally be branched to.
|
2014-04-25 13:29:35 +08:00
|
|
|
BasicBlock *TheRealDest = nullptr;
|
2012-07-03 07:22:21 +08:00
|
|
|
for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
|
|
|
|
if (ThisCases[i].Value == TIV) {
|
|
|
|
TheRealDest = ThisCases[i].Dest;
|
|
|
|
break;
|
|
|
|
}
|
2010-12-13 09:47:07 +08:00
|
|
|
|
|
|
|
// If not handled by any explicit cases, it is handled by the default case.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!TheRealDest) TheRealDest = ThisDef;
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Remove PHI node entries for dead edges.
|
|
|
|
BasicBlock *CheckEdge = TheRealDest;
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
|
|
|
|
if (*SI != CheckEdge)
|
|
|
|
(*SI)->removePredecessor(TIBB);
|
2010-12-13 09:47:07 +08:00
|
|
|
else
|
2014-04-25 13:29:35 +08:00
|
|
|
CheckEdge = nullptr;
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Insert the new branch.
|
2011-05-19 04:35:38 +08:00
|
|
|
Instruction *NI = Builder.CreateBr(TheRealDest);
|
2010-12-13 09:47:07 +08:00
|
|
|
(void) NI;
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
|
|
|
|
<< "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
|
2005-02-24 14:17:52 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
EraseTerminatorInstAndDCECond(TI);
|
|
|
|
return true;
|
2005-02-24 14:17:52 +08:00
|
|
|
}
|
|
|
|
|
2009-03-13 05:01:11 +08:00
|
|
|
namespace {
|
2015-06-25 04:40:57 +08:00
|
|
|
/// This class implements a stable ordering of constant
|
2009-03-13 05:01:11 +08:00
|
|
|
/// integers that does not depend on their address. This is important for
|
|
|
|
/// applications that sort ConstantInt's to ensure uniqueness.
|
|
|
|
struct ConstantIntOrdering {
|
|
|
|
bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
|
|
|
|
return LHS->getValue().ult(RHS->getValue());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
2009-03-12 09:00:26 +08:00
|
|
|
|
2013-09-22 22:09:50 +08:00
|
|
|
static int ConstantIntSortPredicate(ConstantInt *const *P1,
|
|
|
|
ConstantInt *const *P2) {
|
|
|
|
const ConstantInt *LHS = *P1;
|
|
|
|
const ConstantInt *RHS = *P2;
|
2010-12-15 12:52:41 +08:00
|
|
|
if (LHS->getValue().ult(RHS->getValue()))
|
|
|
|
return 1;
|
|
|
|
if (LHS->getValue() == RHS->getValue())
|
|
|
|
return 0;
|
|
|
|
return -1;
|
2010-12-13 10:00:58 +08:00
|
|
|
}
|
|
|
|
|
2012-08-30 05:46:38 +08:00
|
|
|
static inline bool HasBranchWeights(const Instruction* I) {
|
2014-11-12 05:30:22 +08:00
|
|
|
MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
|
2012-08-30 05:46:38 +08:00
|
|
|
if (ProfMD && ProfMD->getOperand(0))
|
|
|
|
if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
|
|
|
|
return MDS->getString().equals("branch_weights");
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-09-12 01:43:35 +08:00
|
|
|
/// Get Weights of a given TerminatorInst, the default weight is at the front
|
|
|
|
/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
|
|
|
|
/// metadata.
|
|
|
|
static void GetBranchWeights(TerminatorInst *TI,
|
|
|
|
SmallVectorImpl<uint64_t> &Weights) {
|
2014-11-12 05:30:22 +08:00
|
|
|
MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
|
2012-09-12 01:43:35 +08:00
|
|
|
assert(MD);
|
|
|
|
for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
|
IR: Split Metadata from Value
Split `Metadata` away from the `Value` class hierarchy, as part of
PR21532. Assembly and bitcode changes are in the wings, but this is the
bulk of the change for the IR C++ API.
I have a follow-up patch prepared for `clang`. If this breaks other
sub-projects, I apologize in advance :(. Help me compile it on Darwin
I'll try to fix it. FWIW, the errors should be easy to fix, so it may
be simpler to just fix it yourself.
This breaks the build for all metadata-related code that's out-of-tree.
Rest assured the transition is mechanical and the compiler should catch
almost all of the problems.
Here's a quick guide for updating your code:
- `Metadata` is the root of a class hierarchy with three main classes:
`MDNode`, `MDString`, and `ValueAsMetadata`. It is distinct from
the `Value` class hierarchy. It is typeless -- i.e., instances do
*not* have a `Type`.
- `MDNode`'s operands are all `Metadata *` (instead of `Value *`).
- `TrackingVH<MDNode>` and `WeakVH` referring to metadata can be
replaced with `TrackingMDNodeRef` and `TrackingMDRef`, respectively.
If you're referring solely to resolved `MDNode`s -- post graph
construction -- just use `MDNode*`.
- `MDNode` (and the rest of `Metadata`) have only limited support for
`replaceAllUsesWith()`.
As long as an `MDNode` is pointing at a forward declaration -- the
result of `MDNode::getTemporary()` -- it maintains a side map of its
uses and can RAUW itself. Once the forward declarations are fully
resolved RAUW support is dropped on the ground. This means that
uniquing collisions on changing operands cause nodes to become
"distinct". (This already happened fairly commonly, whenever an
operand went to null.)
If you're constructing complex (non self-reference) `MDNode` cycles,
you need to call `MDNode::resolveCycles()` on each node (or on a
top-level node that somehow references all of the nodes). Also,
don't do that. Metadata cycles (and the RAUW machinery needed to
construct them) are expensive.
- An `MDNode` can only refer to a `Constant` through a bridge called
`ConstantAsMetadata` (one of the subclasses of `ValueAsMetadata`).
As a side effect, accessing an operand of an `MDNode` that is known
to be, e.g., `ConstantInt`, takes three steps: first, cast from
`Metadata` to `ConstantAsMetadata`; second, extract the `Constant`;
third, cast down to `ConstantInt`.
The eventual goal is to introduce `MDInt`/`MDFloat`/etc. and have
metadata schema owners transition away from using `Constant`s when
the type isn't important (and they don't care about referring to
`GlobalValue`s).
In the meantime, I've added transitional API to the `mdconst`
namespace that matches semantics with the old code, in order to
avoid adding the error-prone three-step equivalent to every call
site. If your old code was:
MDNode *N = foo();
bar(isa <ConstantInt>(N->getOperand(0)));
baz(cast <ConstantInt>(N->getOperand(1)));
bak(cast_or_null <ConstantInt>(N->getOperand(2)));
bat(dyn_cast <ConstantInt>(N->getOperand(3)));
bay(dyn_cast_or_null<ConstantInt>(N->getOperand(4)));
you can trivially match its semantics with:
MDNode *N = foo();
bar(mdconst::hasa <ConstantInt>(N->getOperand(0)));
baz(mdconst::extract <ConstantInt>(N->getOperand(1)));
bak(mdconst::extract_or_null <ConstantInt>(N->getOperand(2)));
bat(mdconst::dyn_extract <ConstantInt>(N->getOperand(3)));
bay(mdconst::dyn_extract_or_null<ConstantInt>(N->getOperand(4)));
and when you transition your metadata schema to `MDInt`:
MDNode *N = foo();
bar(isa <MDInt>(N->getOperand(0)));
baz(cast <MDInt>(N->getOperand(1)));
bak(cast_or_null <MDInt>(N->getOperand(2)));
bat(dyn_cast <MDInt>(N->getOperand(3)));
bay(dyn_cast_or_null<MDInt>(N->getOperand(4)));
- A `CallInst` -- specifically, intrinsic instructions -- can refer to
metadata through a bridge called `MetadataAsValue`. This is a
subclass of `Value` where `getType()->isMetadataTy()`.
`MetadataAsValue` is the *only* class that can legally refer to a
`LocalAsMetadata`, which is a bridged form of non-`Constant` values
like `Argument` and `Instruction`. It can also refer to any other
`Metadata` subclass.
(I'll break all your testcases in a follow-up commit, when I propagate
this change to assembly.)
llvm-svn: 223802
2014-12-10 02:38:53 +08:00
|
|
|
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
|
2012-09-12 01:43:35 +08:00
|
|
|
Weights.push_back(CI->getValue().getZExtValue());
|
2012-08-30 05:46:38 +08:00
|
|
|
}
|
|
|
|
|
2012-09-12 01:43:35 +08:00
|
|
|
// If TI is a conditional eq, the default case is the false case,
|
|
|
|
// and the corresponding branch-weight data is at index 2. We swap the
|
|
|
|
// default weight to be the first entry.
|
|
|
|
if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
|
|
|
|
assert(Weights.size() == 2);
|
|
|
|
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
|
|
|
|
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
|
|
|
|
std::swap(Weights.front(), Weights.back());
|
2012-08-30 05:46:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-28 07:39:03 +08:00
|
|
|
/// Keep halving the weights until all can fit in uint32_t.
|
2012-08-30 05:46:38 +08:00
|
|
|
static void FitWeights(MutableArrayRef<uint64_t> Weights) {
|
2014-03-09 22:42:55 +08:00
|
|
|
uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
|
|
|
|
if (Max > UINT_MAX) {
|
|
|
|
unsigned Offset = 32 - countLeadingZeros(Max);
|
|
|
|
for (uint64_t &I : Weights)
|
|
|
|
I >>= Offset;
|
2014-01-28 07:39:03 +08:00
|
|
|
}
|
2012-08-30 05:46:38 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// The specified terminator is a value equality comparison instruction
|
|
|
|
/// (either a switch or a branch on "X == c").
|
2009-01-20 07:43:56 +08:00
|
|
|
/// See if any of the predecessors of the terminator block are value comparisons
|
|
|
|
/// on the same value. If so, and if safe to do so, fold them together.
|
2011-05-19 04:53:17 +08:00
|
|
|
bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
|
|
|
|
IRBuilder<> &Builder) {
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
BasicBlock *BB = TI->getParent();
|
|
|
|
Value *CV = isValueEqualityComparison(TI); // CondVal
|
|
|
|
assert(CV && "Not a comparison?");
|
|
|
|
bool Changed = false;
|
|
|
|
|
2008-02-18 15:42:56 +08:00
|
|
|
SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
while (!Preds.empty()) {
|
2009-05-07 01:22:41 +08:00
|
|
|
BasicBlock *Pred = Preds.pop_back_val();
|
2005-04-22 07:48:37 +08:00
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
// See if the predecessor is a comparison with the same value.
|
|
|
|
TerminatorInst *PTI = Pred->getTerminator();
|
|
|
|
Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
|
|
|
|
|
|
|
|
if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
|
|
|
|
// Figure out which 'cases' to copy from SI to PSI.
|
2012-07-03 07:22:21 +08:00
|
|
|
std::vector<ValueEqualityComparisonCase> BBCases;
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
|
|
|
|
|
2012-07-03 07:22:21 +08:00
|
|
|
std::vector<ValueEqualityComparisonCase> PredCases;
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
|
|
|
|
|
|
|
|
// Based on whether the default edge from PTI goes to BB or not, fill in
|
|
|
|
// PredCases and PredDefault with the new switch cases we would like to
|
|
|
|
// build.
|
2008-02-18 15:42:56 +08:00
|
|
|
SmallVector<BasicBlock*, 8> NewSuccessors;
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
|
2012-08-30 05:46:38 +08:00
|
|
|
// Update the branch weight metadata along the way
|
|
|
|
SmallVector<uint64_t, 8> Weights;
|
|
|
|
bool PredHasWeights = HasBranchWeights(PTI);
|
|
|
|
bool SuccHasWeights = HasBranchWeights(TI);
|
|
|
|
|
2012-09-15 03:05:19 +08:00
|
|
|
if (PredHasWeights) {
|
2012-09-12 01:43:35 +08:00
|
|
|
GetBranchWeights(PTI, Weights);
|
2012-11-16 02:40:31 +08:00
|
|
|
// branch-weight metadata is inconsistent here.
|
2012-09-15 03:05:19 +08:00
|
|
|
if (Weights.size() != 1 + PredCases.size())
|
|
|
|
PredHasWeights = SuccHasWeights = false;
|
|
|
|
} else if (SuccHasWeights)
|
2012-08-30 05:46:38 +08:00
|
|
|
// If there are no predecessor weights but there are successor weights,
|
|
|
|
// populate Weights with 1, which will later be scaled to the sum of
|
|
|
|
// successor's weights
|
|
|
|
Weights.assign(1 + PredCases.size(), 1);
|
|
|
|
|
2012-09-12 01:43:35 +08:00
|
|
|
SmallVector<uint64_t, 8> SuccWeights;
|
2012-09-15 03:05:19 +08:00
|
|
|
if (SuccHasWeights) {
|
2012-09-12 01:43:35 +08:00
|
|
|
GetBranchWeights(TI, SuccWeights);
|
2012-11-16 02:40:31 +08:00
|
|
|
// branch-weight metadata is inconsistent here.
|
2012-09-15 03:05:19 +08:00
|
|
|
if (SuccWeights.size() != 1 + BBCases.size())
|
|
|
|
PredHasWeights = SuccHasWeights = false;
|
|
|
|
} else if (PredHasWeights)
|
2012-09-12 01:43:35 +08:00
|
|
|
SuccWeights.assign(1 + BBCases.size(), 1);
|
2012-08-30 05:46:38 +08:00
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
if (PredDefault == BB) {
|
|
|
|
// If this is the default destination from PTI, only the edges in TI
|
|
|
|
// that don't occur in PTI, or that branch to BB will be activated.
|
2012-07-03 07:22:21 +08:00
|
|
|
std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
|
|
|
|
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
|
|
|
|
if (PredCases[i].Dest != BB)
|
|
|
|
PTIHandled.insert(PredCases[i].Value);
|
|
|
|
else {
|
|
|
|
// The default destination is BB, we don't need explicit targets.
|
|
|
|
std::swap(PredCases[i], PredCases.back());
|
2012-08-30 05:46:38 +08:00
|
|
|
|
2012-09-12 01:43:35 +08:00
|
|
|
if (PredHasWeights || SuccHasWeights) {
|
|
|
|
// Increase weight for the default case.
|
|
|
|
Weights[0] += Weights[i+1];
|
2012-08-30 05:46:38 +08:00
|
|
|
std::swap(Weights[i+1], Weights.back());
|
|
|
|
Weights.pop_back();
|
|
|
|
}
|
|
|
|
|
2012-07-03 07:22:21 +08:00
|
|
|
PredCases.pop_back();
|
|
|
|
--i; --e;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reconstruct the new switch statement we will be building.
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
if (PredDefault != BBDefault) {
|
|
|
|
PredDefault->removePredecessor(Pred);
|
|
|
|
PredDefault = BBDefault;
|
|
|
|
NewSuccessors.push_back(BBDefault);
|
|
|
|
}
|
2012-08-30 05:46:38 +08:00
|
|
|
|
2012-09-12 01:43:35 +08:00
|
|
|
unsigned CasesFromPred = Weights.size();
|
|
|
|
uint64_t ValidTotalSuccWeight = 0;
|
2012-07-03 07:22:21 +08:00
|
|
|
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
|
|
|
|
if (!PTIHandled.count(BBCases[i].Value) &&
|
|
|
|
BBCases[i].Dest != BBDefault) {
|
|
|
|
PredCases.push_back(BBCases[i]);
|
|
|
|
NewSuccessors.push_back(BBCases[i].Dest);
|
2012-09-12 01:43:35 +08:00
|
|
|
if (SuccHasWeights || PredHasWeights) {
|
|
|
|
// The default weight is at index 0, so weight for the ith case
|
|
|
|
// should be at index i+1. Scale the cases from successor by
|
|
|
|
// PredDefaultWeight (Weights[0]).
|
|
|
|
Weights.push_back(Weights[0] * SuccWeights[i+1]);
|
|
|
|
ValidTotalSuccWeight += SuccWeights[i+1];
|
2012-08-30 05:46:38 +08:00
|
|
|
}
|
2012-07-03 07:22:21 +08:00
|
|
|
}
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
|
2012-09-12 01:43:35 +08:00
|
|
|
if (SuccHasWeights || PredHasWeights) {
|
|
|
|
ValidTotalSuccWeight += SuccWeights[0];
|
|
|
|
// Scale the cases from predecessor by ValidTotalSuccWeight.
|
|
|
|
for (unsigned i = 1; i < CasesFromPred; ++i)
|
|
|
|
Weights[i] *= ValidTotalSuccWeight;
|
|
|
|
// Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
|
|
|
|
Weights[0] *= SuccWeights[0];
|
|
|
|
}
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
} else {
|
|
|
|
// If this is not the default destination from PSI, only the edges
|
|
|
|
// in SI that occur in PSI with a destination of BB will be
|
|
|
|
// activated.
|
2012-07-03 07:22:21 +08:00
|
|
|
std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
|
2012-09-15 01:29:56 +08:00
|
|
|
std::map<ConstantInt*, uint64_t> WeightsForHandled;
|
2012-07-03 07:22:21 +08:00
|
|
|
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
|
|
|
|
if (PredCases[i].Dest == BB) {
|
|
|
|
PTIHandled.insert(PredCases[i].Value);
|
2012-09-15 01:29:56 +08:00
|
|
|
|
|
|
|
if (PredHasWeights || SuccHasWeights) {
|
|
|
|
WeightsForHandled[PredCases[i].Value] = Weights[i+1];
|
|
|
|
std::swap(Weights[i+1], Weights.back());
|
|
|
|
Weights.pop_back();
|
|
|
|
}
|
|
|
|
|
2012-07-03 07:22:21 +08:00
|
|
|
std::swap(PredCases[i], PredCases.back());
|
|
|
|
PredCases.pop_back();
|
|
|
|
--i; --e;
|
|
|
|
}
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
|
|
|
|
// Okay, now we know which constants were sent to BB from the
|
|
|
|
// predecessor. Figure out where they will all go now.
|
2012-07-03 07:22:21 +08:00
|
|
|
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
|
|
|
|
if (PTIHandled.count(BBCases[i].Value)) {
|
|
|
|
// If this is one we are capable of getting...
|
2012-09-15 01:29:56 +08:00
|
|
|
if (PredHasWeights || SuccHasWeights)
|
|
|
|
Weights.push_back(WeightsForHandled[BBCases[i].Value]);
|
2012-07-03 07:22:21 +08:00
|
|
|
PredCases.push_back(BBCases[i]);
|
|
|
|
NewSuccessors.push_back(BBCases[i].Dest);
|
|
|
|
PTIHandled.erase(BBCases[i].Value);// This constant is taken care of
|
|
|
|
}
|
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
// If there are any constants vectored to BB that TI doesn't handle,
|
|
|
|
// they must go to the default destination of TI.
|
2012-08-30 05:46:36 +08:00
|
|
|
for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
|
2012-07-03 07:22:21 +08:00
|
|
|
PTIHandled.begin(),
|
|
|
|
E = PTIHandled.end(); I != E; ++I) {
|
2012-11-16 02:40:29 +08:00
|
|
|
if (PredHasWeights || SuccHasWeights)
|
|
|
|
Weights.push_back(WeightsForHandled[*I]);
|
2012-07-03 07:22:21 +08:00
|
|
|
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
NewSuccessors.push_back(BBDefault);
|
2012-07-03 07:22:21 +08:00
|
|
|
}
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Okay, at this point, we know which new successor Pred will get. Make
|
|
|
|
// sure we update the number of entries in the PHI nodes for these
|
|
|
|
// successors.
|
|
|
|
for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
|
|
|
|
AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
|
|
|
|
|
2011-05-19 04:53:17 +08:00
|
|
|
Builder.SetInsertPoint(PTI);
|
2010-02-06 06:03:18 +08:00
|
|
|
// Convert pointer to int before we switch.
|
2010-02-16 19:11:14 +08:00
|
|
|
if (CV->getType()->isPointerTy()) {
|
2015-03-10 10:37:25 +08:00
|
|
|
CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
|
2011-05-19 04:53:17 +08:00
|
|
|
"magicptr");
|
2010-02-06 06:03:18 +08:00
|
|
|
}
|
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
// Now that the successors are updated, create the new Switch instruction.
|
2011-05-19 04:53:17 +08:00
|
|
|
SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
|
|
|
|
PredCases.size());
|
2011-05-18 07:29:05 +08:00
|
|
|
NewSI->setDebugLoc(PTI->getDebugLoc());
|
2012-07-03 07:22:21 +08:00
|
|
|
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
|
|
|
|
NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
|
2005-01-02 00:02:12 +08:00
|
|
|
|
2012-08-30 05:46:38 +08:00
|
|
|
if (PredHasWeights || SuccHasWeights) {
|
|
|
|
// Halve the weights if any of them cannot fit in an uint32_t
|
|
|
|
FitWeights(Weights);
|
|
|
|
|
|
|
|
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
|
|
|
|
|
|
|
|
NewSI->setMetadata(LLVMContext::MD_prof,
|
|
|
|
MDBuilder(BB->getContext()).
|
|
|
|
createBranchWeights(MDWeights));
|
|
|
|
}
|
|
|
|
|
2008-12-17 04:54:32 +08:00
|
|
|
EraseTerminatorInstAndDCECond(PTI);
|
2005-01-02 00:02:12 +08:00
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
// Okay, last check. If BB is still a successor of PSI, then we must
|
|
|
|
// have an infinite loop case. If so, add an infinitely looping block
|
|
|
|
// to handle the case to preserve the behavior of the code.
|
2014-04-25 13:29:35 +08:00
|
|
|
BasicBlock *InfLoopBlock = nullptr;
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
|
|
|
|
if (NewSI->getSuccessor(i) == BB) {
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!InfLoopBlock) {
|
2008-07-14 06:23:11 +08:00
|
|
|
// Insert it at the end of the function, because it's either code,
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
// or it won't matter if it's hot. :)
|
2009-08-14 05:58:54 +08:00
|
|
|
InfLoopBlock = BasicBlock::Create(BB->getContext(),
|
|
|
|
"infloop", BB->getParent());
|
2008-04-07 04:25:17 +08:00
|
|
|
BranchInst::Create(InfLoopBlock, InfLoopBlock);
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
}
|
|
|
|
NewSI->setSuccessor(i, InfLoopBlock);
|
|
|
|
}
|
2005-04-22 07:48:37 +08:00
|
|
|
|
Implement switch->br and br->switch folding by ripping out the switch->switch
and br->br code and generalizing it. This allows us to compile code like this:
int test(Instruction *I) {
if (isa<CastInst>(I))
return foo(7);
else if (isa<BranchInst>(I))
return foo(123);
else if (isa<UnwindInst>(I))
return foo(1241);
else if (isa<SetCondInst>(I))
return foo(1);
else if (isa<VAArgInst>(I))
return foo(42);
return foo(-1);
}
into:
int %_Z4testPN4llvm11InstructionE("struct.llvm::Instruction"* %I) {
entry:
%tmp.1.i.i.i.i.i.i.i = getelementptr "struct.llvm::Instruction"* %I, long 0, ubyte 4 ; <uint*> [#uses=1]
%tmp.2.i.i.i.i.i.i.i = load uint* %tmp.1.i.i.i.i.i.i.i ; <uint> [#uses=2]
%tmp.2.i.i.i.i.i.i = seteq uint %tmp.2.i.i.i.i.i.i.i, 27 ; <bool> [#uses=0]
switch uint %tmp.2.i.i.i.i.i.i.i, label %endif.0 [
uint 27, label %then.0
uint 2, label %then.1
uint 5, label %then.2
uint 14, label %then.3
uint 15, label %then.3
uint 16, label %then.3
uint 17, label %then.3
uint 18, label %then.3
uint 19, label %then.3
uint 32, label %then.4
]
...
As well as handling the cases in 176.gcc and many other programs more effectively.
llvm-svn: 11964
2004-02-29 05:28:10 +08:00
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// If we would need to insert a select that uses the value of this invoke
|
|
|
|
// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
|
|
|
|
// can't hoist the invoke, as there is nowhere to put the select in this case.
|
2009-06-16 04:59:27 +08:00
|
|
|
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
|
|
|
|
Instruction *I1, Instruction *I2) {
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
|
2009-06-16 04:59:27 +08:00
|
|
|
PHINode *PN;
|
2014-07-22 01:06:51 +08:00
|
|
|
for (BasicBlock::iterator BBI = SI->begin();
|
2009-06-16 04:59:27 +08:00
|
|
|
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
|
|
|
|
Value *BB1V = PN->getIncomingValueForBlock(BB1);
|
|
|
|
Value *BB2V = PN->getIncomingValueForBlock(BB2);
|
|
|
|
if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
SimplifyCFG: Don't convert phis into selects if we could remove undef behavior
instead
We used to transform this:
define void @test6(i1 %cond, i8* %ptr) {
entry:
br i1 %cond, label %bb1, label %bb2
bb1:
br label %bb2
bb2:
%ptr.2 = phi i8* [ %ptr, %entry ], [ null, %bb1 ]
store i8 2, i8* %ptr.2, align 8
ret void
}
into this:
define void @test6(i1 %cond, i8* %ptr) {
%ptr.2 = select i1 %cond, i8* null, i8* %ptr
store i8 2, i8* %ptr.2, align 8
ret void
}
because the simplifycfg transformation into selects would happen to happen
before the simplifycfg transformation that removes unreachable control flow
(We have 'unreachable control flow' due to the store to null which is undefined
behavior).
The existing transformation that removes unreachable control flow in simplifycfg
is:
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB)
Now we generate:
define void @test6(i1 %cond, i8* %ptr) {
store i8 2, i8* %ptr.2, align 8
ret void
}
I did not see any impact on the test-suite + externals.
rdar://18596215
llvm-svn: 219462
2014-10-10 09:27:02 +08:00
|
|
|
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
|
|
|
|
/// in the two blocks up into the branch block. The caller of this function
|
|
|
|
/// guarantees that BI's block dominates BB1 and BB2.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool HoistThenElseCodeToIf(BranchInst *BI,
|
2015-02-24 03:15:16 +08:00
|
|
|
const TargetTransformInfo &TTI) {
|
2004-11-30 08:29:14 +08:00
|
|
|
// This does very trivial matching, with limited scanning, to find identical
|
|
|
|
// instructions in the two blocks. In particular, we don't want to get into
|
|
|
|
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
|
|
|
|
// such, we currently just scan for obviously identical instructions in an
|
|
|
|
// identical order.
|
|
|
|
BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
|
|
|
|
BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
|
|
|
|
|
2009-02-04 08:03:08 +08:00
|
|
|
BasicBlock::iterator BB1_Itr = BB1->begin();
|
|
|
|
BasicBlock::iterator BB2_Itr = BB2->begin();
|
|
|
|
|
|
|
|
Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
|
2011-04-08 01:27:36 +08:00
|
|
|
// Skip debug info if it is not identical.
|
|
|
|
DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
|
|
|
|
DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
|
|
|
|
if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
|
|
|
|
while (isa<DbgInfoIntrinsic>(I1))
|
|
|
|
I1 = BB1_Itr++;
|
|
|
|
while (isa<DbgInfoIntrinsic>(I2))
|
|
|
|
I2 = BB2_Itr++;
|
|
|
|
}
|
2011-04-07 08:30:15 +08:00
|
|
|
if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
|
2009-06-16 04:59:27 +08:00
|
|
|
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
|
2004-11-30 08:29:14 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
BasicBlock *BIParent = BI->getParent();
|
|
|
|
|
2013-06-04 04:43:12 +08:00
|
|
|
bool Changed = false;
|
2004-11-30 08:29:14 +08:00
|
|
|
do {
|
|
|
|
// If we are hoisting the terminator instruction, don't move one (making a
|
|
|
|
// broken BB), instead clone it, and remove BI.
|
|
|
|
if (isa<TerminatorInst>(I1))
|
|
|
|
goto HoistTerminator;
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2015-02-24 03:15:16 +08:00
|
|
|
if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
|
|
|
|
return Changed;
|
|
|
|
|
2004-11-30 08:29:14 +08:00
|
|
|
// For a normal instruction, we just move one to right before the branch,
|
|
|
|
// then replace all uses of the other with the first. Finally, we remove
|
|
|
|
// the now redundant second instruction.
|
|
|
|
BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
|
|
|
|
if (!I2->use_empty())
|
|
|
|
I2->replaceAllUsesWith(I1);
|
2009-08-26 06:11:20 +08:00
|
|
|
I1->intersectOptionalDataWith(I2);
|
2014-08-15 23:46:38 +08:00
|
|
|
unsigned KnownIDs[] = {
|
|
|
|
LLVMContext::MD_tbaa,
|
|
|
|
LLVMContext::MD_range,
|
|
|
|
LLVMContext::MD_fpmath,
|
2014-10-23 00:37:13 +08:00
|
|
|
LLVMContext::MD_invariant_load,
|
|
|
|
LLVMContext::MD_nonnull
|
2014-08-15 23:46:38 +08:00
|
|
|
};
|
|
|
|
combineMetadata(I1, I2, KnownIDs);
|
2010-12-14 14:17:25 +08:00
|
|
|
I2->eraseFromParent();
|
2013-06-04 04:43:12 +08:00
|
|
|
Changed = true;
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2009-02-04 08:03:08 +08:00
|
|
|
I1 = BB1_Itr++;
|
|
|
|
I2 = BB2_Itr++;
|
2011-04-08 01:27:36 +08:00
|
|
|
// Skip debug info if it is not identical.
|
|
|
|
DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
|
|
|
|
DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
|
|
|
|
if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
|
|
|
|
while (isa<DbgInfoIntrinsic>(I1))
|
|
|
|
I1 = BB1_Itr++;
|
|
|
|
while (isa<DbgInfoIntrinsic>(I2))
|
|
|
|
I2 = BB2_Itr++;
|
|
|
|
}
|
2011-04-07 08:30:15 +08:00
|
|
|
} while (I1->isIdenticalToWhenDefined(I2));
|
2004-11-30 08:29:14 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
HoistTerminator:
|
2009-06-16 04:59:27 +08:00
|
|
|
// It may not be possible to hoist an invoke.
|
|
|
|
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
|
2013-06-04 04:43:12 +08:00
|
|
|
return Changed;
|
|
|
|
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
|
2013-06-04 04:43:12 +08:00
|
|
|
PHINode *PN;
|
2014-07-22 01:06:51 +08:00
|
|
|
for (BasicBlock::iterator BBI = SI->begin();
|
2013-06-04 04:43:12 +08:00
|
|
|
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
|
|
|
|
Value *BB1V = PN->getIncomingValueForBlock(BB1);
|
|
|
|
Value *BB2V = PN->getIncomingValueForBlock(BB2);
|
|
|
|
if (BB1V == BB2V)
|
|
|
|
continue;
|
|
|
|
|
SimplifyCFG: Don't convert phis into selects if we could remove undef behavior
instead
We used to transform this:
define void @test6(i1 %cond, i8* %ptr) {
entry:
br i1 %cond, label %bb1, label %bb2
bb1:
br label %bb2
bb2:
%ptr.2 = phi i8* [ %ptr, %entry ], [ null, %bb1 ]
store i8 2, i8* %ptr.2, align 8
ret void
}
into this:
define void @test6(i1 %cond, i8* %ptr) {
%ptr.2 = select i1 %cond, i8* null, i8* %ptr
store i8 2, i8* %ptr.2, align 8
ret void
}
because the simplifycfg transformation into selects would happen to happen
before the simplifycfg transformation that removes unreachable control flow
(We have 'unreachable control flow' due to the store to null which is undefined
behavior).
The existing transformation that removes unreachable control flow in simplifycfg
is:
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB)
Now we generate:
define void @test6(i1 %cond, i8* %ptr) {
store i8 2, i8* %ptr.2, align 8
ret void
}
I did not see any impact on the test-suite + externals.
rdar://18596215
llvm-svn: 219462
2014-10-10 09:27:02 +08:00
|
|
|
// Check for passingValueIsAlwaysUndefined here because we would rather
|
|
|
|
// eliminate undefined control flow then converting it to a select.
|
|
|
|
if (passingValueIsAlwaysUndefined(BB1V, PN) ||
|
|
|
|
passingValueIsAlwaysUndefined(BB2V, PN))
|
|
|
|
return Changed;
|
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
|
2013-06-04 04:43:12 +08:00
|
|
|
return Changed;
|
2015-03-10 10:37:25 +08:00
|
|
|
if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
|
2013-06-04 04:43:12 +08:00
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
}
|
2009-06-16 04:59:27 +08:00
|
|
|
|
2004-11-30 08:29:14 +08:00
|
|
|
// Okay, it is safe to hoist the terminator.
|
2009-09-27 15:38:41 +08:00
|
|
|
Instruction *NT = I1->clone();
|
2004-11-30 08:29:14 +08:00
|
|
|
BIParent->getInstList().insert(BI, NT);
|
2010-01-05 21:12:22 +08:00
|
|
|
if (!NT->getType()->isVoidTy()) {
|
2004-11-30 08:29:14 +08:00
|
|
|
I1->replaceAllUsesWith(NT);
|
|
|
|
I2->replaceAllUsesWith(NT);
|
2007-02-11 09:37:51 +08:00
|
|
|
NT->takeName(I1);
|
2004-11-30 08:29:14 +08:00
|
|
|
}
|
|
|
|
|
2011-05-20 04:52:46 +08:00
|
|
|
IRBuilder<true, NoFolder> Builder(NT);
|
2004-11-30 08:29:14 +08:00
|
|
|
// Hoisting one of the terminators from our successor is a great thing.
|
|
|
|
// Unfortunately, the successors of the if/else blocks may have PHI nodes in
|
|
|
|
// them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
|
|
|
|
// nodes, so we insert select instruction to compute the final result.
|
|
|
|
std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
|
2004-11-30 08:29:14 +08:00
|
|
|
PHINode *PN;
|
2014-07-22 01:06:51 +08:00
|
|
|
for (BasicBlock::iterator BBI = SI->begin();
|
2004-11-30 15:47:34 +08:00
|
|
|
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
|
2004-11-30 08:29:14 +08:00
|
|
|
Value *BB1V = PN->getIncomingValueForBlock(BB1);
|
|
|
|
Value *BB2V = PN->getIncomingValueForBlock(BB2);
|
2010-12-13 09:47:07 +08:00
|
|
|
if (BB1V == BB2V) continue;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// These values do not agree. Insert a select instruction before NT
|
|
|
|
// that determines the right value.
|
|
|
|
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!SI)
|
2011-05-20 04:52:46 +08:00
|
|
|
SI = cast<SelectInst>
|
|
|
|
(Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
|
|
|
|
BB1V->getName()+"."+BB2V->getName()));
|
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Make the PHI node use the select for all incoming values for BB1/BB2
|
|
|
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
|
|
|
|
if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
|
|
|
|
PN->setIncomingValue(i, SI);
|
2004-11-30 08:29:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update any PHI nodes in our new successors.
|
2014-07-22 01:06:51 +08:00
|
|
|
for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
|
|
|
|
AddPredecessorToBlock(*SI, BIParent, BB1);
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2008-12-17 04:54:32 +08:00
|
|
|
EraseTerminatorInstAndDCECond(BI);
|
2004-11-30 08:29:14 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given an unconditional branch that goes to BBEnd,
|
2012-09-21 06:37:36 +08:00
|
|
|
/// check whether BBEnd has only two predecessors and the other predecessor
|
|
|
|
/// ends with an unconditional branch. If it is true, sink any common code
|
|
|
|
/// in the two predecessors to BBEnd.
|
|
|
|
static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
|
|
|
|
assert(BI1->isUnconditional());
|
|
|
|
BasicBlock *BB1 = BI1->getParent();
|
|
|
|
BasicBlock *BBEnd = BI1->getSuccessor(0);
|
|
|
|
|
|
|
|
// Check that BBEnd has two predecessors and the other predecessor ends with
|
|
|
|
// an unconditional branch.
|
2012-10-01 05:03:56 +08:00
|
|
|
pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
|
|
|
|
BasicBlock *Pred0 = *PI++;
|
|
|
|
if (PI == PE) // Only one predecessor.
|
2012-09-21 06:37:36 +08:00
|
|
|
return false;
|
2012-10-01 05:03:56 +08:00
|
|
|
BasicBlock *Pred1 = *PI++;
|
|
|
|
if (PI != PE) // More than two predecessors.
|
|
|
|
return false;
|
|
|
|
BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
|
2012-09-21 06:37:36 +08:00
|
|
|
BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
|
|
|
|
if (!BI2 || !BI2->isUnconditional())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Gather the PHI nodes in BBEnd.
|
2014-12-23 16:26:55 +08:00
|
|
|
SmallDenseMap<std::pair<Value *, Value *>, PHINode *> JointValueMap;
|
2014-04-25 13:29:35 +08:00
|
|
|
Instruction *FirstNonPhiInBBEnd = nullptr;
|
2014-12-23 16:26:55 +08:00
|
|
|
for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) {
|
2012-09-21 06:37:36 +08:00
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(I)) {
|
|
|
|
Value *BB1V = PN->getIncomingValueForBlock(BB1);
|
2012-11-16 02:40:29 +08:00
|
|
|
Value *BB2V = PN->getIncomingValueForBlock(BB2);
|
2014-12-23 16:26:55 +08:00
|
|
|
JointValueMap[std::make_pair(BB1V, BB2V)] = PN;
|
2012-09-21 06:37:36 +08:00
|
|
|
} else {
|
|
|
|
FirstNonPhiInBBEnd = &*I;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!FirstNonPhiInBBEnd)
|
|
|
|
return false;
|
2012-11-16 02:40:29 +08:00
|
|
|
|
2012-09-21 06:37:36 +08:00
|
|
|
// This does very trivial matching, with limited scanning, to find identical
|
|
|
|
// instructions in the two blocks. We scan backward for obviously identical
|
|
|
|
// instructions in an identical order.
|
|
|
|
BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
|
2014-12-23 16:26:55 +08:00
|
|
|
RE1 = BB1->getInstList().rend(),
|
|
|
|
RI2 = BB2->getInstList().rbegin(),
|
|
|
|
RE2 = BB2->getInstList().rend();
|
2012-09-21 06:37:36 +08:00
|
|
|
// Skip debug info.
|
|
|
|
while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
|
|
|
|
if (RI1 == RE1)
|
|
|
|
return false;
|
|
|
|
while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
|
|
|
|
if (RI2 == RE2)
|
|
|
|
return false;
|
|
|
|
// Skip the unconditional branches.
|
|
|
|
++RI1;
|
|
|
|
++RI2;
|
|
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
while (RI1 != RE1 && RI2 != RE2) {
|
|
|
|
// Skip debug info.
|
|
|
|
while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
|
|
|
|
if (RI1 == RE1)
|
|
|
|
return Changed;
|
|
|
|
while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
|
|
|
|
if (RI2 == RE2)
|
|
|
|
return Changed;
|
|
|
|
|
|
|
|
Instruction *I1 = &*RI1, *I2 = &*RI2;
|
2014-12-23 16:26:55 +08:00
|
|
|
auto InstPair = std::make_pair(I1, I2);
|
2012-09-21 06:37:36 +08:00
|
|
|
// I1 and I2 should have a single use in the same PHI node, and they
|
|
|
|
// perform the same operation.
|
|
|
|
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
|
|
|
|
if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
|
|
|
|
isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
|
|
|
|
isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
|
|
|
|
isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
|
|
|
|
I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
|
|
|
|
I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
|
|
|
|
!I1->hasOneUse() || !I2->hasOneUse() ||
|
2014-12-23 16:26:55 +08:00
|
|
|
!JointValueMap.count(InstPair))
|
2012-09-21 06:37:36 +08:00
|
|
|
return Changed;
|
|
|
|
|
|
|
|
// Check whether we should swap the operands of ICmpInst.
|
2014-12-23 16:26:55 +08:00
|
|
|
// TODO: Add support of communativity.
|
2012-09-21 06:37:36 +08:00
|
|
|
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
|
|
|
|
bool SwapOpnds = false;
|
|
|
|
if (ICmp1 && ICmp2 &&
|
|
|
|
ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
|
|
|
|
ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
|
|
|
|
(ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
|
|
|
|
ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
|
|
|
|
ICmp2->swapOperands();
|
|
|
|
SwapOpnds = true;
|
|
|
|
}
|
|
|
|
if (!I1->isSameOperationAs(I2)) {
|
|
|
|
if (SwapOpnds)
|
|
|
|
ICmp2->swapOperands();
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The operands should be either the same or they need to be generated
|
|
|
|
// with a PHI node after sinking. We only handle the case where there is
|
|
|
|
// a single pair of different operands.
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
|
2014-12-23 16:26:55 +08:00
|
|
|
unsigned Op1Idx = ~0U;
|
2012-09-21 06:37:36 +08:00
|
|
|
for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
|
|
|
|
if (I1->getOperand(I) == I2->getOperand(I))
|
|
|
|
continue;
|
2014-12-23 16:26:55 +08:00
|
|
|
// Early exit if we have more-than one pair of different operands or if
|
|
|
|
// we need a PHI node to replace a constant.
|
|
|
|
if (Op1Idx != ~0U ||
|
2012-09-21 06:37:36 +08:00
|
|
|
isa<Constant>(I1->getOperand(I)) ||
|
|
|
|
isa<Constant>(I2->getOperand(I))) {
|
|
|
|
// If we can't sink the instructions, undo the swapping.
|
|
|
|
if (SwapOpnds)
|
|
|
|
ICmp2->swapOperands();
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
DifferentOp1 = I1->getOperand(I);
|
|
|
|
Op1Idx = I;
|
|
|
|
DifferentOp2 = I2->getOperand(I);
|
|
|
|
}
|
|
|
|
|
2014-12-23 16:26:55 +08:00
|
|
|
DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n");
|
|
|
|
DEBUG(dbgs() << " " << *I2 << "\n");
|
|
|
|
|
|
|
|
// We insert the pair of different operands to JointValueMap and
|
|
|
|
// remove (I1, I2) from JointValueMap.
|
|
|
|
if (Op1Idx != ~0U) {
|
|
|
|
auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)];
|
|
|
|
if (!NewPN) {
|
|
|
|
NewPN =
|
|
|
|
PHINode::Create(DifferentOp1->getType(), 2,
|
|
|
|
DifferentOp1->getName() + ".sink", BBEnd->begin());
|
|
|
|
NewPN->addIncoming(DifferentOp1, BB1);
|
|
|
|
NewPN->addIncoming(DifferentOp2, BB2);
|
|
|
|
DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
|
|
|
|
}
|
2012-09-21 06:37:36 +08:00
|
|
|
// I1 should use NewPN instead of DifferentOp1.
|
|
|
|
I1->setOperand(Op1Idx, NewPN);
|
|
|
|
}
|
2014-12-23 16:26:55 +08:00
|
|
|
PHINode *OldPN = JointValueMap[InstPair];
|
|
|
|
JointValueMap.erase(InstPair);
|
2012-09-21 06:37:36 +08:00
|
|
|
|
|
|
|
// We need to update RE1 and RE2 if we are going to sink the first
|
|
|
|
// instruction in the basic block down.
|
|
|
|
bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
|
|
|
|
// Sink the instruction.
|
|
|
|
BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
|
|
|
|
if (!OldPN->use_empty())
|
|
|
|
OldPN->replaceAllUsesWith(I1);
|
|
|
|
OldPN->eraseFromParent();
|
|
|
|
|
|
|
|
if (!I2->use_empty())
|
|
|
|
I2->replaceAllUsesWith(I1);
|
|
|
|
I1->intersectOptionalDataWith(I2);
|
2014-10-23 00:37:13 +08:00
|
|
|
// TODO: Use combineMetadata here to preserve what metadata we can
|
|
|
|
// (analogous to the hoisting case above).
|
2012-09-21 06:37:36 +08:00
|
|
|
I2->eraseFromParent();
|
|
|
|
|
|
|
|
if (UpdateRE1)
|
|
|
|
RE1 = BB1->getInstList().rend();
|
|
|
|
if (UpdateRE2)
|
|
|
|
RE2 = BB2->getInstList().rend();
|
|
|
|
FirstNonPhiInBBEnd = I1;
|
|
|
|
NumSinkCommons++;
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2013-04-30 05:28:24 +08:00
|
|
|
/// \brief Determine if we can hoist sink a sole store instruction out of a
|
|
|
|
/// conditional block.
|
|
|
|
///
|
|
|
|
/// We are looking for code like the following:
|
|
|
|
/// BrBB:
|
|
|
|
/// store i32 %add, i32* %arrayidx2
|
|
|
|
/// ... // No other stores or function calls (we could be calling a memory
|
|
|
|
/// ... // function).
|
|
|
|
/// %cmp = icmp ult %x, %y
|
|
|
|
/// br i1 %cmp, label %EndBB, label %ThenBB
|
|
|
|
/// ThenBB:
|
|
|
|
/// store i32 %add5, i32* %arrayidx2
|
|
|
|
/// br label EndBB
|
|
|
|
/// EndBB:
|
|
|
|
/// ...
|
|
|
|
/// We are going to transform this into:
|
|
|
|
/// BrBB:
|
|
|
|
/// store i32 %add, i32* %arrayidx2
|
|
|
|
/// ... //
|
|
|
|
/// %cmp = icmp ult %x, %y
|
|
|
|
/// %add.add5 = select i1 %cmp, i32 %add, %add5
|
|
|
|
/// store i32 %add.add5, i32* %arrayidx2
|
|
|
|
/// ...
|
|
|
|
///
|
|
|
|
/// \return The pointer to the value of the previous store if the store can be
|
|
|
|
/// hoisted into the predecessor block. 0 otherwise.
|
2013-05-24 00:09:15 +08:00
|
|
|
static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
|
|
|
|
BasicBlock *StoreBB, BasicBlock *EndBB) {
|
2013-04-30 05:28:24 +08:00
|
|
|
StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
|
|
|
|
if (!StoreToHoist)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2013-04-30 05:28:24 +08:00
|
|
|
|
|
|
|
// Volatile or atomic.
|
|
|
|
if (!StoreToHoist->isSimple())
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2013-04-30 05:28:24 +08:00
|
|
|
|
|
|
|
Value *StorePtr = StoreToHoist->getPointerOperand();
|
|
|
|
|
|
|
|
// Look for a store to the same pointer in BrBB.
|
|
|
|
unsigned MaxNumInstToLookAt = 10;
|
|
|
|
for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
|
|
|
|
RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
|
|
|
|
Instruction *CurI = &*RI;
|
|
|
|
|
|
|
|
// Could be calling an instruction that effects memory like free().
|
|
|
|
if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2013-04-30 05:28:24 +08:00
|
|
|
|
|
|
|
StoreInst *SI = dyn_cast<StoreInst>(CurI);
|
|
|
|
// Found the previous store make sure it stores to the same location.
|
|
|
|
if (SI && SI->getPointerOperand() == StorePtr)
|
|
|
|
// Found the previous store, return its value operand.
|
|
|
|
return SI->getValueOperand();
|
|
|
|
else if (SI)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr; // Unknown store.
|
2013-04-30 05:28:24 +08:00
|
|
|
}
|
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2013-04-30 05:28:24 +08:00
|
|
|
}
|
|
|
|
|
2013-01-24 16:05:06 +08:00
|
|
|
/// \brief Speculate a conditional basic block flattening the CFG.
|
2012-01-06 07:58:56 +08:00
|
|
|
///
|
2013-01-24 16:05:06 +08:00
|
|
|
/// Note that this is a very risky transform currently. Speculating
|
|
|
|
/// instructions like this is most often not desirable. Instead, there is an MI
|
|
|
|
/// pass which can do it with full awareness of the resource constraints.
|
|
|
|
/// However, some cases are "obvious" and we should do directly. An example of
|
|
|
|
/// this is speculating a single, reasonably cheap instruction.
|
|
|
|
///
|
|
|
|
/// There is only one distinct advantage to flattening the CFG at the IR level:
|
|
|
|
/// it makes very common but simplistic optimizations such as are common in
|
|
|
|
/// instcombine and the DAG combiner more powerful by removing CFG edges and
|
|
|
|
/// modeling their effects with easier to reason about SSA value graphs.
|
|
|
|
///
|
|
|
|
///
|
|
|
|
/// An illustration of this transform is turning this IR:
|
|
|
|
/// \code
|
|
|
|
/// BB:
|
|
|
|
/// %cmp = icmp ult %x, %y
|
|
|
|
/// br i1 %cmp, label %EndBB, label %ThenBB
|
|
|
|
/// ThenBB:
|
|
|
|
/// %sub = sub %x, %y
|
2012-01-06 07:58:56 +08:00
|
|
|
/// br label BB2
|
2013-01-24 16:05:06 +08:00
|
|
|
/// EndBB:
|
|
|
|
/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
|
|
|
|
/// ...
|
|
|
|
/// \endcode
|
|
|
|
///
|
|
|
|
/// Into this IR:
|
|
|
|
/// \code
|
|
|
|
/// BB:
|
|
|
|
/// %cmp = icmp ult %x, %y
|
|
|
|
/// %sub = sub %x, %y
|
|
|
|
/// %cond = select i1 %cmp, 0, %sub
|
|
|
|
/// ...
|
|
|
|
/// \endcode
|
|
|
|
///
|
|
|
|
/// \returns true if the conditional block is removed.
|
2014-07-10 22:41:31 +08:00
|
|
|
static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
|
2015-02-11 20:15:41 +08:00
|
|
|
const TargetTransformInfo &TTI) {
|
2013-01-24 16:22:40 +08:00
|
|
|
// Be conservative for now. FP select instruction can often be expensive.
|
|
|
|
Value *BrCond = BI->getCondition();
|
|
|
|
if (isa<FCmpInst>(BrCond))
|
|
|
|
return false;
|
|
|
|
|
2013-01-24 17:59:39 +08:00
|
|
|
BasicBlock *BB = BI->getParent();
|
|
|
|
BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
|
|
|
|
|
|
|
|
// If ThenBB is actually on the false edge of the conditional branch, remember
|
|
|
|
// to swap the select operands later.
|
|
|
|
bool Invert = false;
|
|
|
|
if (ThenBB != BI->getSuccessor(0)) {
|
|
|
|
assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
|
|
|
|
Invert = true;
|
|
|
|
}
|
|
|
|
assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
|
|
|
|
|
2013-01-25 13:40:09 +08:00
|
|
|
// Keep a count of how many times instructions are used within CondBB when
|
|
|
|
// they are candidates for sinking into CondBB. Specifically:
|
|
|
|
// - They are defined in BB, and
|
|
|
|
// - They have no side effects, and
|
|
|
|
// - All of their uses are in CondBB.
|
|
|
|
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
|
|
|
|
|
2013-01-24 19:52:58 +08:00
|
|
|
unsigned SpeculationCost = 0;
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *SpeculatedStoreValue = nullptr;
|
|
|
|
StoreInst *SpeculatedStore = nullptr;
|
2013-01-24 19:52:58 +08:00
|
|
|
for (BasicBlock::iterator BBI = ThenBB->begin(),
|
2014-03-02 20:27:27 +08:00
|
|
|
BBE = std::prev(ThenBB->end());
|
2009-03-06 14:00:17 +08:00
|
|
|
BBI != BBE; ++BBI) {
|
|
|
|
Instruction *I = BBI;
|
|
|
|
// Skip debug info.
|
2013-01-24 19:52:58 +08:00
|
|
|
if (isa<DbgInfoIntrinsic>(I))
|
|
|
|
continue;
|
2009-03-06 14:00:17 +08:00
|
|
|
|
2015-04-13 02:18:51 +08:00
|
|
|
// Only speculatively execute a single instruction (not counting the
|
2013-01-24 19:52:58 +08:00
|
|
|
// terminator) for now.
|
2013-01-27 14:42:03 +08:00
|
|
|
++SpeculationCost;
|
|
|
|
if (SpeculationCost > 1)
|
2009-03-06 14:00:17 +08:00
|
|
|
return false;
|
2012-01-06 07:58:56 +08:00
|
|
|
|
|
|
|
// Don't hoist the instruction if it's unsafe or expensive.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!isSafeToSpeculativelyExecute(I) &&
|
|
|
|
!(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
|
|
|
|
I, BB, ThenBB, EndBB))))
|
2012-01-06 07:58:56 +08:00
|
|
|
return false;
|
2013-04-30 05:28:24 +08:00
|
|
|
if (!SpeculatedStoreValue &&
|
2015-03-10 10:37:25 +08:00
|
|
|
ComputeSpeculationCost(I, TTI) >
|
|
|
|
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
|
2012-01-06 07:58:56 +08:00
|
|
|
return false;
|
|
|
|
|
2013-04-30 05:28:24 +08:00
|
|
|
// Store the store speculation candidate.
|
|
|
|
if (SpeculatedStoreValue)
|
|
|
|
SpeculatedStore = cast<StoreInst>(I);
|
|
|
|
|
2012-01-06 07:58:56 +08:00
|
|
|
// Do not hoist the instruction if any of its operands are defined but not
|
2013-04-30 05:28:24 +08:00
|
|
|
// used in BB. The transformation will prevent the operand from
|
2012-01-06 07:58:56 +08:00
|
|
|
// being sunk into the use block.
|
2013-01-24 19:52:58 +08:00
|
|
|
for (User::op_iterator i = I->op_begin(), e = I->op_end();
|
2012-01-06 07:58:56 +08:00
|
|
|
i != e; ++i) {
|
|
|
|
Instruction *OpI = dyn_cast<Instruction>(*i);
|
2013-01-25 13:40:09 +08:00
|
|
|
if (!OpI || OpI->getParent() != BB ||
|
|
|
|
OpI->mayHaveSideEffects())
|
|
|
|
continue; // Not a candidate for sinking.
|
|
|
|
|
|
|
|
++SinkCandidateUseCounts[OpI];
|
2012-01-06 07:58:56 +08:00
|
|
|
}
|
|
|
|
}
|
2008-06-07 16:52:29 +08:00
|
|
|
|
2013-01-25 13:40:09 +08:00
|
|
|
// Consider any sink candidates which are only used in CondBB as costs for
|
|
|
|
// speculation. Note, while we iterate over a DenseMap here, we are summing
|
|
|
|
// and so iteration order isn't significant.
|
|
|
|
for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
|
|
|
|
SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
|
|
|
|
I != E; ++I)
|
|
|
|
if (I->first->getNumUses() == I->second) {
|
2013-01-27 14:42:03 +08:00
|
|
|
++SpeculationCost;
|
|
|
|
if (SpeculationCost > 1)
|
2013-01-25 13:40:09 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-01-24 18:40:51 +08:00
|
|
|
// Check that the PHI nodes can be converted to selects.
|
|
|
|
bool HaveRewritablePHIs = false;
|
2013-01-24 17:59:39 +08:00
|
|
|
for (BasicBlock::iterator I = EndBB->begin();
|
2012-01-06 07:58:56 +08:00
|
|
|
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
2013-01-24 17:59:39 +08:00
|
|
|
Value *OrigV = PN->getIncomingValueForBlock(BB);
|
|
|
|
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
|
2012-01-06 07:58:56 +08:00
|
|
|
|
2013-06-04 22:11:59 +08:00
|
|
|
// FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
|
2012-01-06 07:58:56 +08:00
|
|
|
// Skip PHIs which are trivial.
|
2013-01-24 17:59:39 +08:00
|
|
|
if (ThenV == OrigV)
|
2012-01-06 07:58:56 +08:00
|
|
|
continue;
|
|
|
|
|
SimplifyCFG: Don't convert phis into selects if we could remove undef behavior
instead
We used to transform this:
define void @test6(i1 %cond, i8* %ptr) {
entry:
br i1 %cond, label %bb1, label %bb2
bb1:
br label %bb2
bb2:
%ptr.2 = phi i8* [ %ptr, %entry ], [ null, %bb1 ]
store i8 2, i8* %ptr.2, align 8
ret void
}
into this:
define void @test6(i1 %cond, i8* %ptr) {
%ptr.2 = select i1 %cond, i8* null, i8* %ptr
store i8 2, i8* %ptr.2, align 8
ret void
}
because the simplifycfg transformation into selects would happen to happen
before the simplifycfg transformation that removes unreachable control flow
(We have 'unreachable control flow' due to the store to null which is undefined
behavior).
The existing transformation that removes unreachable control flow in simplifycfg
is:
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB)
Now we generate:
define void @test6(i1 %cond, i8* %ptr) {
store i8 2, i8* %ptr.2, align 8
ret void
}
I did not see any impact on the test-suite + externals.
rdar://18596215
llvm-svn: 219462
2014-10-10 09:27:02 +08:00
|
|
|
// Don't convert to selects if we could remove undefined behavior instead.
|
|
|
|
if (passingValueIsAlwaysUndefined(OrigV, PN) ||
|
|
|
|
passingValueIsAlwaysUndefined(ThenV, PN))
|
|
|
|
return false;
|
|
|
|
|
2013-01-24 18:40:51 +08:00
|
|
|
HaveRewritablePHIs = true;
|
2013-06-04 22:11:59 +08:00
|
|
|
ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
|
|
|
|
ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
|
|
|
|
if (!OrigCE && !ThenCE)
|
2013-01-24 19:53:01 +08:00
|
|
|
continue; // Known safe and cheap.
|
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
|
|
|
|
(OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
|
2013-01-24 19:53:01 +08:00
|
|
|
return false;
|
2015-03-10 10:37:25 +08:00
|
|
|
unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
|
|
|
|
unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
|
2015-02-11 20:15:41 +08:00
|
|
|
unsigned MaxCost = 2 * PHINodeFoldingThreshold *
|
|
|
|
TargetTransformInfo::TCC_Basic;
|
|
|
|
if (OrigCost + ThenCost > MaxCost)
|
2013-01-24 19:53:01 +08:00
|
|
|
return false;
|
2012-01-06 07:58:56 +08:00
|
|
|
|
2013-01-24 20:05:17 +08:00
|
|
|
// Account for the cost of an unfolded ConstantExpr which could end up
|
|
|
|
// getting expanded into Instructions.
|
|
|
|
// FIXME: This doesn't account for how many operations are combined in the
|
2013-01-27 14:42:03 +08:00
|
|
|
// constant expression.
|
|
|
|
++SpeculationCost;
|
|
|
|
if (SpeculationCost > 1)
|
2013-01-24 18:40:51 +08:00
|
|
|
return false;
|
2009-01-19 08:36:37 +08:00
|
|
|
}
|
2008-06-07 16:52:29 +08:00
|
|
|
|
2012-01-06 07:58:56 +08:00
|
|
|
// If there are no PHIs to process, bail early. This helps ensure idempotence
|
|
|
|
// as well.
|
2013-04-30 05:28:24 +08:00
|
|
|
if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
|
2012-01-06 07:58:56 +08:00
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-01-06 07:58:56 +08:00
|
|
|
// If we get here, we can hoist the instruction and if-convert.
|
2013-01-24 17:59:39 +08:00
|
|
|
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
|
2008-06-13 05:15:59 +08:00
|
|
|
|
2013-04-30 05:28:24 +08:00
|
|
|
// Insert a select of the value of the speculated store.
|
|
|
|
if (SpeculatedStoreValue) {
|
|
|
|
IRBuilder<true, NoFolder> Builder(BI);
|
|
|
|
Value *TrueV = SpeculatedStore->getValueOperand();
|
|
|
|
Value *FalseV = SpeculatedStoreValue;
|
|
|
|
if (Invert)
|
|
|
|
std::swap(TrueV, FalseV);
|
|
|
|
Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
|
|
|
|
"." + FalseV->getName());
|
|
|
|
SpeculatedStore->setOperand(0, S);
|
|
|
|
}
|
|
|
|
|
2013-01-24 19:52:58 +08:00
|
|
|
// Hoist the instructions.
|
|
|
|
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
|
2014-03-02 20:27:27 +08:00
|
|
|
std::prev(ThenBB->end()));
|
2008-06-07 16:52:29 +08:00
|
|
|
|
2012-01-06 07:58:56 +08:00
|
|
|
// Insert selects and rewrite the PHI operands.
|
2011-05-20 04:52:46 +08:00
|
|
|
IRBuilder<true, NoFolder> Builder(BI);
|
2013-01-24 18:40:51 +08:00
|
|
|
for (BasicBlock::iterator I = EndBB->begin();
|
|
|
|
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
|
|
|
unsigned OrigI = PN->getBasicBlockIndex(BB);
|
|
|
|
unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
|
|
|
|
Value *OrigV = PN->getIncomingValue(OrigI);
|
|
|
|
Value *ThenV = PN->getIncomingValue(ThenI);
|
|
|
|
|
|
|
|
// Skip PHIs which are trivial.
|
|
|
|
if (OrigV == ThenV)
|
|
|
|
continue;
|
2012-01-06 07:58:56 +08:00
|
|
|
|
|
|
|
// Create a select whose true value is the speculatively executed value and
|
2013-01-24 18:40:51 +08:00
|
|
|
// false value is the preexisting value. Swap them if the branch
|
|
|
|
// destinations were inverted.
|
|
|
|
Value *TrueV = ThenV, *FalseV = OrigV;
|
2012-01-06 07:58:56 +08:00
|
|
|
if (Invert)
|
2013-01-24 18:40:51 +08:00
|
|
|
std::swap(TrueV, FalseV);
|
|
|
|
Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
|
|
|
|
TrueV->getName() + "." + FalseV->getName());
|
|
|
|
PN->setIncomingValue(OrigI, V);
|
|
|
|
PN->setIncomingValue(ThenI, V);
|
2008-06-07 16:52:29 +08:00
|
|
|
}
|
|
|
|
|
2008-06-13 05:15:59 +08:00
|
|
|
++NumSpeculations;
|
2008-06-07 16:52:29 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-10-22 04:07:30 +08:00
|
|
|
/// \returns True if this block contains a CallInst with the NoDuplicate
|
|
|
|
/// attribute.
|
|
|
|
static bool HasNoDuplicateCall(const BasicBlock *BB) {
|
|
|
|
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
|
|
|
const CallInst *CI = dyn_cast<CallInst>(I);
|
|
|
|
if (!CI)
|
|
|
|
continue;
|
|
|
|
if (CI->cannotDuplicate())
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if we can thread a branch across this block.
|
2005-09-20 08:43:16 +08:00
|
|
|
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
|
|
|
|
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
|
2005-09-20 09:48:40 +08:00
|
|
|
unsigned Size = 0;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2009-03-11 02:00:05 +08:00
|
|
|
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
|
2009-03-13 07:18:09 +08:00
|
|
|
if (isa<DbgInfoIntrinsic>(BBI))
|
|
|
|
continue;
|
2005-09-20 09:48:40 +08:00
|
|
|
if (Size > 10) return false; // Don't clone large BB's.
|
2009-03-13 07:18:09 +08:00
|
|
|
++Size;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2009-03-13 07:18:09 +08:00
|
|
|
// We can only support instructions that do not define values that are
|
2005-09-20 09:48:40 +08:00
|
|
|
// live outside of the current basic block.
|
2014-03-09 11:16:01 +08:00
|
|
|
for (User *U : BBI->users()) {
|
|
|
|
Instruction *UI = cast<Instruction>(U);
|
|
|
|
if (UI->getParent() != BB || isa<PHINode>(UI)) return false;
|
2005-09-20 09:48:40 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-20 08:43:16 +08:00
|
|
|
// Looks ok, continue checking.
|
|
|
|
}
|
2005-09-20 09:48:40 +08:00
|
|
|
|
2005-09-20 08:43:16 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If we have a conditional branch on a PHI node value that is defined in the
|
|
|
|
/// same block as the branch and if any PHI entries are constants, thread edges
|
|
|
|
/// corresponding to that entry to be branches to their ultimate destination.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
|
2005-09-20 07:49:37 +08:00
|
|
|
BasicBlock *BB = BI->getParent();
|
|
|
|
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
|
2005-09-20 07:57:04 +08:00
|
|
|
// NOTE: we currently cannot transform this case if the PHI node is used
|
|
|
|
// outside of the block.
|
2005-09-20 08:43:16 +08:00
|
|
|
if (!PN || PN->getParent() != BB || !PN->hasOneUse())
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-20 07:49:37 +08:00
|
|
|
// Degenerate case of a single entry PHI.
|
|
|
|
if (PN->getNumIncomingValues() == 1) {
|
2008-12-04 03:44:02 +08:00
|
|
|
FoldSingleEntryPHINodes(PN->getParent());
|
2012-08-30 05:46:36 +08:00
|
|
|
return true;
|
2005-09-20 07:49:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Now we know that this block has multiple preds and two succs.
|
2005-09-20 08:43:16 +08:00
|
|
|
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2013-10-22 04:07:30 +08:00
|
|
|
if (HasNoDuplicateCall(BB)) return false;
|
|
|
|
|
2005-09-20 07:49:37 +08:00
|
|
|
// Okay, this is a simple enough basic block. See if any phi values are
|
|
|
|
// constants.
|
2007-01-11 20:24:14 +08:00
|
|
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
2010-12-13 09:47:07 +08:00
|
|
|
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!CB || !CB->getType()->isIntegerTy(1)) continue;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Okay, we now know that all edges from PredBB should be revectored to
|
|
|
|
// branch to RealDest.
|
|
|
|
BasicBlock *PredBB = PN->getIncomingBlock(i);
|
|
|
|
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
if (RealDest == BB) continue; // Skip self loops.
|
2011-06-04 17:42:04 +08:00
|
|
|
// Skip if the predecessor's terminator is an indirect branch.
|
|
|
|
if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// The dest block might have PHI nodes, other predecessors and other
|
|
|
|
// difficult cases. Instead of being smart about this, just insert a new
|
|
|
|
// block that jumps to the destination block, effectively splitting
|
|
|
|
// the edge we are about to create.
|
|
|
|
BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
|
|
|
|
RealDest->getName()+".critedge",
|
|
|
|
RealDest->getParent(), RealDest);
|
|
|
|
BranchInst::Create(RealDest, EdgeBB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 15:09:42 +08:00
|
|
|
// Update PHI nodes.
|
|
|
|
AddPredecessorToBlock(RealDest, EdgeBB, BB);
|
2010-12-13 09:47:07 +08:00
|
|
|
|
|
|
|
// BB may have instructions that are being threaded over. Clone these
|
|
|
|
// instructions into EdgeBB. We know that there will be no uses of the
|
|
|
|
// cloned instructions outside of EdgeBB.
|
|
|
|
BasicBlock::iterator InsertPt = EdgeBB->begin();
|
|
|
|
DenseMap<Value*, Value*> TranslateMap; // Track translated values.
|
|
|
|
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
|
|
|
|
TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Clone the instruction.
|
|
|
|
Instruction *N = BBI->clone();
|
|
|
|
if (BBI->hasName()) N->setName(BBI->getName()+".c");
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Update operands due to translation.
|
|
|
|
for (User::op_iterator i = N->op_begin(), e = N->op_end();
|
|
|
|
i != e; ++i) {
|
|
|
|
DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
|
|
|
|
if (PI != TranslateMap.end())
|
|
|
|
*i = PI->second;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Check for trivial simplification.
|
2014-02-21 08:06:31 +08:00
|
|
|
if (Value *V = SimplifyInstruction(N, DL)) {
|
2010-12-14 14:17:25 +08:00
|
|
|
TranslateMap[BBI] = V;
|
|
|
|
delete N; // Instruction folded away, don't need actual inst
|
2010-12-13 09:47:07 +08:00
|
|
|
} else {
|
|
|
|
// Insert the new instruction into its new home.
|
|
|
|
EdgeBB->getInstList().insert(InsertPt, N);
|
|
|
|
if (!BBI->use_empty())
|
|
|
|
TranslateMap[BBI] = N;
|
2005-09-20 09:48:40 +08:00
|
|
|
}
|
2010-12-13 09:47:07 +08:00
|
|
|
}
|
2005-09-20 09:48:40 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Loop over all of the edges from PredBB to BB, changing them to branch
|
|
|
|
// to EdgeBB instead.
|
|
|
|
TerminatorInst *PredBBTI = PredBB->getTerminator();
|
|
|
|
for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
|
|
|
|
if (PredBBTI->getSuccessor(i) == BB) {
|
|
|
|
BB->removePredecessor(PredBB);
|
|
|
|
PredBBTI->setSuccessor(i, EdgeBB);
|
2005-09-20 09:48:40 +08:00
|
|
|
}
|
2011-06-04 17:42:04 +08:00
|
|
|
|
2010-12-13 09:47:07 +08:00
|
|
|
// Recurse, simplifying any other constants.
|
2014-02-21 08:06:31 +08:00
|
|
|
return FoldCondBranchOnPHI(BI, DL) | true;
|
2007-01-11 20:24:14 +08:00
|
|
|
}
|
2005-09-20 07:49:37 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given a BB that starts with the specified two-entry PHI node,
|
|
|
|
/// see if we can eliminate it.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
|
|
|
const DataLayout &DL) {
|
2005-09-23 14:39:30 +08:00
|
|
|
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
|
|
|
|
// statement", which has a very simple dominance structure. Basically, we
|
|
|
|
// are trying to find the condition that is being branched on, which
|
|
|
|
// subsequently causes this merge to happen. We really want control
|
|
|
|
// dependence information for this check, but simplifycfg can't keep it up
|
|
|
|
// to date, and this catches most of the cases we care about anyway.
|
|
|
|
BasicBlock *BB = PN->getParent();
|
|
|
|
BasicBlock *IfTrue, *IfFalse;
|
|
|
|
Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
|
2010-12-14 16:01:53 +08:00
|
|
|
if (!IfCond ||
|
|
|
|
// Don't bother if the branch will be constant folded trivially.
|
|
|
|
isa<ConstantInt>(IfCond))
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2006-11-19 03:19:36 +08:00
|
|
|
// Okay, we found that we can merge this two-entry phi node into a select.
|
|
|
|
// Doing so would require us to fold *all* two entry phi nodes in this block.
|
|
|
|
// At some point this becomes non-profitable (particularly if the target
|
|
|
|
// doesn't support cmov's). Only do this transformation if there are two or
|
|
|
|
// fewer PHI nodes in this block.
|
|
|
|
unsigned NumPhis = 0;
|
|
|
|
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
|
|
|
|
if (NumPhis > 2)
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-23 14:39:30 +08:00
|
|
|
// Loop over the PHI's seeing if we can promote them all to select
|
|
|
|
// instructions. While we are at it, keep track of the instructions
|
|
|
|
// that need to be moved to the dominating block.
|
2010-12-14 15:41:39 +08:00
|
|
|
SmallPtrSet<Instruction*, 4> AggressiveInsts;
|
2011-04-30 02:47:38 +08:00
|
|
|
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
|
|
|
|
MaxCostVal1 = PHINodeFoldingThreshold;
|
2015-02-11 20:15:41 +08:00
|
|
|
MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
|
|
|
|
MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 16:46:09 +08:00
|
|
|
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
|
|
|
|
PHINode *PN = cast<PHINode>(II++);
|
2014-02-21 08:06:31 +08:00
|
|
|
if (Value *V = SimplifyInstruction(PN, DL)) {
|
2010-12-14 15:20:29 +08:00
|
|
|
PN->replaceAllUsesWith(V);
|
2010-12-14 16:46:09 +08:00
|
|
|
PN->eraseFromParent();
|
2010-12-14 15:20:29 +08:00
|
|
|
continue;
|
2005-09-23 14:39:30 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2011-04-30 02:47:31 +08:00
|
|
|
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
|
2015-03-10 10:37:25 +08:00
|
|
|
MaxCostVal0, TTI) ||
|
2011-04-30 02:47:31 +08:00
|
|
|
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
|
2015-03-10 10:37:25 +08:00
|
|
|
MaxCostVal1, TTI))
|
2010-12-14 15:20:29 +08:00
|
|
|
return false;
|
2005-09-23 14:39:30 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-07-23 16:51:15 +08:00
|
|
|
// If we folded the first phi, PN dangles at this point. Refresh it. If
|
2010-12-14 15:41:39 +08:00
|
|
|
// we ran out of PHIs then we simplified them all.
|
|
|
|
PN = dyn_cast<PHINode>(BB->begin());
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!PN) return true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 16:46:09 +08:00
|
|
|
// Don't fold i1 branches on PHIs which contain binary operators. These can
|
|
|
|
// often be turned into switches and other things.
|
|
|
|
if (PN->getType()->isIntegerTy(1) &&
|
|
|
|
(isa<BinaryOperator>(PN->getIncomingValue(0)) ||
|
|
|
|
isa<BinaryOperator>(PN->getIncomingValue(1)) ||
|
|
|
|
isa<BinaryOperator>(IfCond)))
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-23 14:39:30 +08:00
|
|
|
// If we all PHI nodes are promotable, check to make sure that all
|
|
|
|
// instructions in the predecessor blocks can be promoted as well. If
|
|
|
|
// not, we won't be able to get rid of the control flow, so it's not
|
|
|
|
// worth promoting to select instructions.
|
2014-04-25 13:29:35 +08:00
|
|
|
BasicBlock *DomBlock = nullptr;
|
2010-12-14 15:41:39 +08:00
|
|
|
BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
|
|
|
|
BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
|
|
|
|
if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
|
2014-04-25 13:29:35 +08:00
|
|
|
IfBlock1 = nullptr;
|
2010-12-14 15:41:39 +08:00
|
|
|
} else {
|
|
|
|
DomBlock = *pred_begin(IfBlock1);
|
|
|
|
for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
|
2009-02-04 06:12:02 +08:00
|
|
|
if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
|
2005-09-23 14:39:30 +08:00
|
|
|
// This is not an aggressive instruction that we can promote.
|
|
|
|
// Because of this, we won't be able to get rid of the control
|
|
|
|
// flow, so the xform is not worth it.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 15:41:39 +08:00
|
|
|
if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
|
2014-04-25 13:29:35 +08:00
|
|
|
IfBlock2 = nullptr;
|
2010-12-14 15:41:39 +08:00
|
|
|
} else {
|
|
|
|
DomBlock = *pred_begin(IfBlock2);
|
|
|
|
for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
|
2009-02-04 06:12:02 +08:00
|
|
|
if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
|
2005-09-23 14:39:30 +08:00
|
|
|
// This is not an aggressive instruction that we can promote.
|
|
|
|
// Because of this, we won't be able to get rid of the control
|
|
|
|
// flow, so the xform is not worth it.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 15:23:10 +08:00
|
|
|
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
|
2010-12-14 15:41:39 +08:00
|
|
|
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-23 14:39:30 +08:00
|
|
|
// If we can still promote the PHI nodes after this gauntlet of tests,
|
|
|
|
// do all of the PHI's now.
|
2010-12-14 16:46:09 +08:00
|
|
|
Instruction *InsertPt = DomBlock->getTerminator();
|
2011-05-20 04:52:46 +08:00
|
|
|
IRBuilder<true, NoFolder> Builder(InsertPt);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-23 14:39:30 +08:00
|
|
|
// Move all 'aggressive' instructions, which are defined in the
|
|
|
|
// conditional parts of the if's up to the dominating block.
|
2010-12-13 09:47:07 +08:00
|
|
|
if (IfBlock1)
|
2010-12-14 16:46:09 +08:00
|
|
|
DomBlock->getInstList().splice(InsertPt,
|
2010-12-13 09:47:07 +08:00
|
|
|
IfBlock1->getInstList(), IfBlock1->begin(),
|
2005-09-23 14:39:30 +08:00
|
|
|
IfBlock1->getTerminator());
|
2010-12-13 09:47:07 +08:00
|
|
|
if (IfBlock2)
|
2010-12-14 16:46:09 +08:00
|
|
|
DomBlock->getInstList().splice(InsertPt,
|
2010-12-13 09:47:07 +08:00
|
|
|
IfBlock2->getInstList(), IfBlock2->begin(),
|
2005-09-23 14:39:30 +08:00
|
|
|
IfBlock2->getTerminator());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2005-09-23 14:39:30 +08:00
|
|
|
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
|
|
|
|
// Change the PHI node into a select instruction.
|
2010-12-13 09:47:07 +08:00
|
|
|
Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
|
|
|
|
Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
|
|
|
SelectInst *NV =
|
2011-05-19 02:16:44 +08:00
|
|
|
cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
|
2007-02-11 09:37:51 +08:00
|
|
|
PN->replaceAllUsesWith(NV);
|
|
|
|
NV->takeName(PN);
|
2010-12-14 14:17:25 +08:00
|
|
|
PN->eraseFromParent();
|
2005-09-23 14:39:30 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 16:01:53 +08:00
|
|
|
// At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
|
|
|
|
// has been flattened. Change DomBlock to jump directly to our new block to
|
|
|
|
// avoid other simplifycfg's kicking in on the diamond.
|
|
|
|
TerminatorInst *OldTI = DomBlock->getTerminator();
|
2011-05-19 02:16:44 +08:00
|
|
|
Builder.SetInsertPoint(OldTI);
|
|
|
|
Builder.CreateBr(BB);
|
2010-12-14 16:01:53 +08:00
|
|
|
OldTI->eraseFromParent();
|
2005-09-23 14:39:30 +08:00
|
|
|
return true;
|
|
|
|
}
|
2005-09-20 07:49:37 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If we found a conditional branch that goes to two returning blocks,
|
|
|
|
/// try to merge them together into one return,
|
2008-04-24 08:01:19 +08:00
|
|
|
/// introducing a select if the return values disagree.
|
2012-08-30 05:46:36 +08:00
|
|
|
static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
|
2011-05-19 05:33:11 +08:00
|
|
|
IRBuilder<> &Builder) {
|
2008-04-24 08:01:19 +08:00
|
|
|
assert(BI->isConditional() && "Must be a conditional branch");
|
|
|
|
BasicBlock *TrueSucc = BI->getSuccessor(0);
|
|
|
|
BasicBlock *FalseSucc = BI->getSuccessor(1);
|
|
|
|
ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
|
|
|
|
ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-04-24 08:01:19 +08:00
|
|
|
// Check to ensure both blocks are empty (just a return) or optionally empty
|
|
|
|
// with PHI nodes. If there are other instructions, merging would cause extra
|
|
|
|
// computation on one path or the other.
|
2010-12-13 09:47:07 +08:00
|
|
|
if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
|
2009-02-05 08:30:42 +08:00
|
|
|
return false;
|
2010-12-13 09:47:07 +08:00
|
|
|
if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
|
2009-02-05 08:30:42 +08:00
|
|
|
return false;
|
2008-04-24 08:01:19 +08:00
|
|
|
|
2011-05-19 05:33:11 +08:00
|
|
|
Builder.SetInsertPoint(BI);
|
2008-04-24 08:01:19 +08:00
|
|
|
// Okay, we found a branch that is going to two return nodes. If
|
|
|
|
// there is no return value for this function, just change the
|
|
|
|
// branch into a return.
|
|
|
|
if (FalseRet->getNumOperands() == 0) {
|
|
|
|
TrueSucc->removePredecessor(BI->getParent());
|
|
|
|
FalseSucc->removePredecessor(BI->getParent());
|
2011-05-19 05:33:11 +08:00
|
|
|
Builder.CreateRetVoid();
|
2008-12-17 04:54:32 +08:00
|
|
|
EraseTerminatorInstAndDCECond(BI);
|
2008-04-24 08:01:19 +08:00
|
|
|
return true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-23 08:34:11 +08:00
|
|
|
// Otherwise, figure out what the true and false return values are
|
|
|
|
// so we can insert a new select instruction.
|
|
|
|
Value *TrueValue = TrueRet->getReturnValue();
|
|
|
|
Value *FalseValue = FalseRet->getReturnValue();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-23 08:34:11 +08:00
|
|
|
// Unwrap any PHI nodes in the return blocks.
|
|
|
|
if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
|
|
|
|
if (TVPN->getParent() == TrueSucc)
|
|
|
|
TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
|
|
|
|
if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
|
|
|
|
if (FVPN->getParent() == FalseSucc)
|
|
|
|
FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-23 08:34:11 +08:00
|
|
|
// In order for this transformation to be safe, we must be able to
|
|
|
|
// unconditionally execute both operands to the return. This is
|
|
|
|
// normally the case, but we could have a potentially-trapping
|
|
|
|
// constant expression that prevents this transformation from being
|
|
|
|
// safe.
|
|
|
|
if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
|
|
|
|
if (TCV->canTrap())
|
|
|
|
return false;
|
|
|
|
if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
|
|
|
|
if (FCV->canTrap())
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-04-24 08:01:19 +08:00
|
|
|
// Okay, we collected all the mapped values and checked them for sanity, and
|
|
|
|
// defined to really do this transformation. First, update the CFG.
|
|
|
|
TrueSucc->removePredecessor(BI->getParent());
|
|
|
|
FalseSucc->removePredecessor(BI->getParent());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-04-24 08:01:19 +08:00
|
|
|
// Insert select instructions where needed.
|
|
|
|
Value *BrCond = BI->getCondition();
|
2008-07-23 08:34:11 +08:00
|
|
|
if (TrueValue) {
|
2008-04-24 08:01:19 +08:00
|
|
|
// Insert a select if the results differ.
|
2008-07-23 08:34:11 +08:00
|
|
|
if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
|
|
|
|
} else if (isa<UndefValue>(TrueValue)) {
|
|
|
|
TrueValue = FalseValue;
|
|
|
|
} else {
|
2011-05-19 05:33:11 +08:00
|
|
|
TrueValue = Builder.CreateSelect(BrCond, TrueValue,
|
|
|
|
FalseValue, "retval");
|
2008-04-24 08:01:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-30 05:46:36 +08:00
|
|
|
Value *RI = !TrueValue ?
|
2011-05-19 05:33:11 +08:00
|
|
|
Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
|
|
|
|
|
2009-08-23 18:29:55 +08:00
|
|
|
(void) RI;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-01-05 09:26:52 +08:00
|
|
|
DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
|
2009-08-23 12:37:46 +08:00
|
|
|
<< "\n " << *BI << "NewRet = " << *RI
|
|
|
|
<< "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-12-17 04:54:32 +08:00
|
|
|
EraseTerminatorInstAndDCECond(BI);
|
|
|
|
|
2008-04-24 08:01:19 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Given a conditional BranchInstruction, retrieve the probabilities of the
|
|
|
|
/// branch taking each edge. Fills in the two APInt parameters and returns true,
|
|
|
|
/// or returns false if no or invalid metadata was found.
|
2014-12-10 01:32:12 +08:00
|
|
|
static bool ExtractBranchMetadata(BranchInst *BI,
|
|
|
|
uint64_t &ProbTrue, uint64_t &ProbFalse) {
|
|
|
|
assert(BI->isConditional() &&
|
|
|
|
"Looking for probabilities on unconditional branch?");
|
|
|
|
MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
|
|
|
|
if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
|
IR: Split Metadata from Value
Split `Metadata` away from the `Value` class hierarchy, as part of
PR21532. Assembly and bitcode changes are in the wings, but this is the
bulk of the change for the IR C++ API.
I have a follow-up patch prepared for `clang`. If this breaks other
sub-projects, I apologize in advance :(. Help me compile it on Darwin
I'll try to fix it. FWIW, the errors should be easy to fix, so it may
be simpler to just fix it yourself.
This breaks the build for all metadata-related code that's out-of-tree.
Rest assured the transition is mechanical and the compiler should catch
almost all of the problems.
Here's a quick guide for updating your code:
- `Metadata` is the root of a class hierarchy with three main classes:
`MDNode`, `MDString`, and `ValueAsMetadata`. It is distinct from
the `Value` class hierarchy. It is typeless -- i.e., instances do
*not* have a `Type`.
- `MDNode`'s operands are all `Metadata *` (instead of `Value *`).
- `TrackingVH<MDNode>` and `WeakVH` referring to metadata can be
replaced with `TrackingMDNodeRef` and `TrackingMDRef`, respectively.
If you're referring solely to resolved `MDNode`s -- post graph
construction -- just use `MDNode*`.
- `MDNode` (and the rest of `Metadata`) have only limited support for
`replaceAllUsesWith()`.
As long as an `MDNode` is pointing at a forward declaration -- the
result of `MDNode::getTemporary()` -- it maintains a side map of its
uses and can RAUW itself. Once the forward declarations are fully
resolved RAUW support is dropped on the ground. This means that
uniquing collisions on changing operands cause nodes to become
"distinct". (This already happened fairly commonly, whenever an
operand went to null.)
If you're constructing complex (non self-reference) `MDNode` cycles,
you need to call `MDNode::resolveCycles()` on each node (or on a
top-level node that somehow references all of the nodes). Also,
don't do that. Metadata cycles (and the RAUW machinery needed to
construct them) are expensive.
- An `MDNode` can only refer to a `Constant` through a bridge called
`ConstantAsMetadata` (one of the subclasses of `ValueAsMetadata`).
As a side effect, accessing an operand of an `MDNode` that is known
to be, e.g., `ConstantInt`, takes three steps: first, cast from
`Metadata` to `ConstantAsMetadata`; second, extract the `Constant`;
third, cast down to `ConstantInt`.
The eventual goal is to introduce `MDInt`/`MDFloat`/etc. and have
metadata schema owners transition away from using `Constant`s when
the type isn't important (and they don't care about referring to
`GlobalValue`s).
In the meantime, I've added transitional API to the `mdconst`
namespace that matches semantics with the old code, in order to
avoid adding the error-prone three-step equivalent to every call
site. If your old code was:
MDNode *N = foo();
bar(isa <ConstantInt>(N->getOperand(0)));
baz(cast <ConstantInt>(N->getOperand(1)));
bak(cast_or_null <ConstantInt>(N->getOperand(2)));
bat(dyn_cast <ConstantInt>(N->getOperand(3)));
bay(dyn_cast_or_null<ConstantInt>(N->getOperand(4)));
you can trivially match its semantics with:
MDNode *N = foo();
bar(mdconst::hasa <ConstantInt>(N->getOperand(0)));
baz(mdconst::extract <ConstantInt>(N->getOperand(1)));
bak(mdconst::extract_or_null <ConstantInt>(N->getOperand(2)));
bat(mdconst::dyn_extract <ConstantInt>(N->getOperand(3)));
bay(mdconst::dyn_extract_or_null<ConstantInt>(N->getOperand(4)));
and when you transition your metadata schema to `MDInt`:
MDNode *N = foo();
bar(isa <MDInt>(N->getOperand(0)));
baz(cast <MDInt>(N->getOperand(1)));
bak(cast_or_null <MDInt>(N->getOperand(2)));
bat(dyn_cast <MDInt>(N->getOperand(3)));
bay(dyn_cast_or_null<MDInt>(N->getOperand(4)));
- A `CallInst` -- specifically, intrinsic instructions -- can refer to
metadata through a bridge called `MetadataAsValue`. This is a
subclass of `Value` where `getType()->isMetadataTy()`.
`MetadataAsValue` is the *only* class that can legally refer to a
`LocalAsMetadata`, which is a bridged form of non-`Constant` values
like `Argument` and `Instruction`. It can also refer to any other
`Metadata` subclass.
(I'll break all your testcases in a follow-up commit, when I propagate
this change to assembly.)
llvm-svn: 223802
2014-12-10 02:38:53 +08:00
|
|
|
ConstantInt *CITrue =
|
|
|
|
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
|
|
|
|
ConstantInt *CIFalse =
|
|
|
|
mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
|
2014-12-10 01:32:12 +08:00
|
|
|
if (!CITrue || !CIFalse) return false;
|
|
|
|
ProbTrue = CITrue->getValue().getZExtValue();
|
|
|
|
ProbFalse = CIFalse->getValue().getZExtValue();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if the given instruction is available
|
2012-06-13 13:43:29 +08:00
|
|
|
/// in its predecessor block. If yes, the instruction will be removed.
|
2012-07-13 21:25:15 +08:00
|
|
|
static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
|
2012-06-13 13:43:29 +08:00
|
|
|
if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
|
|
|
|
return false;
|
|
|
|
for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) {
|
|
|
|
Instruction *PBI = &*I;
|
|
|
|
// Check whether Inst and PBI generate the same value.
|
|
|
|
if (Inst->isIdenticalTo(PBI)) {
|
|
|
|
Inst->replaceAllUsesWith(PBI);
|
|
|
|
Inst->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2012-01-25 17:43:14 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If this basic block is simple enough, and if a predecessor branches to us
|
|
|
|
/// and one of our successors, fold the block into the predecessor and use
|
|
|
|
/// logical operations to pick the right destination.
|
2015-03-10 10:37:25 +08:00
|
|
|
bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
|
2008-07-14 06:23:11 +08:00
|
|
|
BasicBlock *BB = BI->getParent();
|
2011-05-20 04:52:46 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
Instruction *Cond = nullptr;
|
2012-06-13 13:43:29 +08:00
|
|
|
if (BI->isConditional())
|
|
|
|
Cond = dyn_cast<Instruction>(BI->getCondition());
|
|
|
|
else {
|
|
|
|
// For unconditional branch, check for a simple CFG pattern, where
|
|
|
|
// BB has a single predecessor and BB's successor is also its predecessor's
|
|
|
|
// successor. If such pattern exisits, check for CSE between BB and its
|
|
|
|
// predecessor.
|
|
|
|
if (BasicBlock *PB = BB->getSinglePredecessor())
|
|
|
|
if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
|
|
|
|
if (PBI->isConditional() &&
|
|
|
|
(BI->getSuccessor(0) == PBI->getSuccessor(0) ||
|
|
|
|
BI->getSuccessor(0) == PBI->getSuccessor(1))) {
|
|
|
|
for (BasicBlock::iterator I = BB->begin(), E = BB->end();
|
|
|
|
I != E; ) {
|
|
|
|
Instruction *Curr = I++;
|
|
|
|
if (isa<CmpInst>(Curr)) {
|
|
|
|
Cond = Curr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Quit if we can't remove this instruction.
|
|
|
|
if (!checkCSEInPredecessor(Curr, PB))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Cond)
|
2012-06-13 13:43:29 +08:00
|
|
|
return false;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
|
|
|
|
Cond->getParent() != BB || !Cond->hasOneUse())
|
2010-07-15 03:52:16 +08:00
|
|
|
return false;
|
2011-04-07 06:37:20 +08:00
|
|
|
|
2008-07-14 05:12:01 +08:00
|
|
|
// Make sure the instruction after the condition is the cond branch.
|
|
|
|
BasicBlock::iterator CondIt = Cond; ++CondIt;
|
2011-04-14 10:44:53 +08:00
|
|
|
|
2014-07-07 07:10:24 +08:00
|
|
|
// Ignore dbg intrinsics.
|
2011-04-14 10:44:53 +08:00
|
|
|
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2011-04-14 10:44:53 +08:00
|
|
|
if (&*CondIt != BI)
|
2008-07-14 05:12:01 +08:00
|
|
|
return false;
|
2009-01-20 07:03:13 +08:00
|
|
|
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
// Only allow this transformation if computing the condition doesn't involve
|
|
|
|
// too many instructions and these involved instructions can be executed
|
|
|
|
// unconditionally. We denote all involved instructions except the condition
|
|
|
|
// as "bonus instructions", and only allow this transformation when the
|
|
|
|
// number of the bonus instructions does not exceed a certain threshold.
|
|
|
|
unsigned NumBonusInsts = 0;
|
|
|
|
for (auto I = BB->begin(); Cond != I; ++I) {
|
|
|
|
// Ignore dbg intrinsics.
|
|
|
|
if (isa<DbgInfoIntrinsic>(I))
|
|
|
|
continue;
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
return false;
|
|
|
|
// I has only one use and can be executed unconditionally.
|
|
|
|
Instruction *User = dyn_cast<Instruction>(I->user_back());
|
|
|
|
if (User == nullptr || User->getParent() != BB)
|
|
|
|
return false;
|
|
|
|
// I is used in the same BB. Since BI uses Cond and doesn't have more slots
|
|
|
|
// to use any other instruction, User must be an instruction between next(I)
|
|
|
|
// and Cond.
|
|
|
|
++NumBonusInsts;
|
|
|
|
// Early exits once we reach the limit.
|
|
|
|
if (NumBonusInsts > BonusInstThreshold)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-01-20 07:03:13 +08:00
|
|
|
// Cond is known to be a compare or binary operator. Check to make sure that
|
|
|
|
// neither operand is a potentially-trapping constant expression.
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
|
|
|
|
if (CE->canTrap())
|
|
|
|
return false;
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
|
|
|
|
if (CE->canTrap())
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 05:12:01 +08:00
|
|
|
// Finally, don't infinitely unroll conditional loops.
|
|
|
|
BasicBlock *TrueDest = BI->getSuccessor(0);
|
2014-04-25 13:29:35 +08:00
|
|
|
BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
|
2008-07-14 05:12:01 +08:00
|
|
|
if (TrueDest == BB || FalseDest == BB)
|
|
|
|
return false;
|
2011-04-07 06:37:20 +08:00
|
|
|
|
2014-07-22 01:06:51 +08:00
|
|
|
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
|
|
|
|
BasicBlock *PredBlock = *PI;
|
2008-07-14 05:12:01 +08:00
|
|
|
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:23:11 +08:00
|
|
|
// Check that we have two conditional branches. If there is a PHI node in
|
|
|
|
// the common successor, verify that the same value flows in from both
|
|
|
|
// blocks.
|
2012-06-13 13:43:29 +08:00
|
|
|
SmallVector<PHINode*, 4> PHIs;
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!PBI || PBI->isUnconditional() ||
|
2012-08-30 05:46:36 +08:00
|
|
|
(BI->isConditional() &&
|
2012-06-13 13:43:29 +08:00
|
|
|
!SafeToMergeTerminators(BI, PBI)) ||
|
|
|
|
(!BI->isConditional() &&
|
|
|
|
!isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
|
2008-07-14 05:12:01 +08:00
|
|
|
continue;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2011-04-14 10:44:53 +08:00
|
|
|
// Determine if the two branches share a common destination.
|
2012-09-17 22:20:57 +08:00
|
|
|
Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
|
2011-04-14 10:44:53 +08:00
|
|
|
bool InvertPredCond = false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-06-13 13:43:29 +08:00
|
|
|
if (BI->isConditional()) {
|
|
|
|
if (PBI->getSuccessor(0) == TrueDest)
|
|
|
|
Opc = Instruction::Or;
|
|
|
|
else if (PBI->getSuccessor(1) == FalseDest)
|
|
|
|
Opc = Instruction::And;
|
|
|
|
else if (PBI->getSuccessor(0) == FalseDest)
|
|
|
|
Opc = Instruction::And, InvertPredCond = true;
|
|
|
|
else if (PBI->getSuccessor(1) == TrueDest)
|
|
|
|
Opc = Instruction::Or, InvertPredCond = true;
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
|
|
|
|
continue;
|
|
|
|
}
|
2011-04-14 10:44:53 +08:00
|
|
|
|
2010-01-05 09:26:52 +08:00
|
|
|
DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
|
2012-08-30 05:46:36 +08:00
|
|
|
IRBuilder<> Builder(PBI);
|
2011-05-20 04:52:46 +08:00
|
|
|
|
2008-07-14 05:20:19 +08:00
|
|
|
// If we need to invert the condition in the pred block to match, do so now.
|
|
|
|
if (InvertPredCond) {
|
Make simplifycfg reprocess newly formed "br (cond1 | cond2)" conditions
when simplifying, allowing them to be eagerly turned into switches. This
is the last step required to get "Example 7" from this blog post:
http://blog.regehr.org/archives/320
On X86, we now generate this machine code, which (to my eye) seems better
than the ICC generated code:
_crud: ## @crud
## BB#0: ## %entry
cmpb $33, %dil
jb LBB0_4
## BB#1: ## %switch.early.test
addb $-34, %dil
cmpb $58, %dil
ja LBB0_3
## BB#2: ## %switch.early.test
movzbl %dil, %eax
movabsq $288230376537592865, %rcx ## imm = 0x400000017001421
btq %rax, %rcx
jb LBB0_4
LBB0_3: ## %lor.rhs
xorl %eax, %eax
ret
LBB0_4: ## %lor.end
movl $1, %eax
ret
llvm-svn: 121690
2010-12-13 15:00:06 +08:00
|
|
|
Value *NewCond = PBI->getCondition();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
Make simplifycfg reprocess newly formed "br (cond1 | cond2)" conditions
when simplifying, allowing them to be eagerly turned into switches. This
is the last step required to get "Example 7" from this blog post:
http://blog.regehr.org/archives/320
On X86, we now generate this machine code, which (to my eye) seems better
than the ICC generated code:
_crud: ## @crud
## BB#0: ## %entry
cmpb $33, %dil
jb LBB0_4
## BB#1: ## %switch.early.test
addb $-34, %dil
cmpb $58, %dil
ja LBB0_3
## BB#2: ## %switch.early.test
movzbl %dil, %eax
movabsq $288230376537592865, %rcx ## imm = 0x400000017001421
btq %rax, %rcx
jb LBB0_4
LBB0_3: ## %lor.rhs
xorl %eax, %eax
ret
LBB0_4: ## %lor.end
movl $1, %eax
ret
llvm-svn: 121690
2010-12-13 15:00:06 +08:00
|
|
|
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
|
|
|
|
CmpInst *CI = cast<CmpInst>(NewCond);
|
|
|
|
CI->setPredicate(CI->getInversePredicate());
|
|
|
|
} else {
|
2012-08-30 05:46:36 +08:00
|
|
|
NewCond = Builder.CreateNot(NewCond,
|
2011-05-20 04:52:46 +08:00
|
|
|
PBI->getCondition()->getName()+".not");
|
Make simplifycfg reprocess newly formed "br (cond1 | cond2)" conditions
when simplifying, allowing them to be eagerly turned into switches. This
is the last step required to get "Example 7" from this blog post:
http://blog.regehr.org/archives/320
On X86, we now generate this machine code, which (to my eye) seems better
than the ICC generated code:
_crud: ## @crud
## BB#0: ## %entry
cmpb $33, %dil
jb LBB0_4
## BB#1: ## %switch.early.test
addb $-34, %dil
cmpb $58, %dil
ja LBB0_3
## BB#2: ## %switch.early.test
movzbl %dil, %eax
movabsq $288230376537592865, %rcx ## imm = 0x400000017001421
btq %rax, %rcx
jb LBB0_4
LBB0_3: ## %lor.rhs
xorl %eax, %eax
ret
LBB0_4: ## %lor.end
movl $1, %eax
ret
llvm-svn: 121690
2010-12-13 15:00:06 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 05:12:01 +08:00
|
|
|
PBI->setCondition(NewCond);
|
2011-12-27 04:54:14 +08:00
|
|
|
PBI->swapSuccessors();
|
2008-07-14 05:12:01 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
// If we have bonus instructions, clone them into the predecessor block.
|
2015-06-25 04:07:50 +08:00
|
|
|
// Note that there may be multiple predecessor blocks, so we cannot move
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
// bonus instructions to a predecessor block.
|
|
|
|
ValueToValueMapTy VMap; // maps original values to cloned values
|
|
|
|
// We already make sure Cond is the last instruction before BI. Therefore,
|
2015-06-25 04:07:50 +08:00
|
|
|
// all instructions before Cond other than DbgInfoIntrinsic are bonus
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
// instructions.
|
|
|
|
for (auto BonusInst = BB->begin(); Cond != BonusInst; ++BonusInst) {
|
|
|
|
if (isa<DbgInfoIntrinsic>(BonusInst))
|
|
|
|
continue;
|
|
|
|
Instruction *NewBonusInst = BonusInst->clone();
|
|
|
|
RemapInstruction(NewBonusInst, VMap,
|
|
|
|
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
|
|
|
|
VMap[BonusInst] = NewBonusInst;
|
2014-01-29 00:56:46 +08:00
|
|
|
|
|
|
|
// If we moved a load, we cannot any longer claim any knowledge about
|
|
|
|
// its potential value. The previous information might have been valid
|
|
|
|
// only given the branch precondition.
|
|
|
|
// For an analogous reason, we must also drop all the metadata whose
|
|
|
|
// semantics we don't understand.
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg);
|
2014-01-29 00:56:46 +08:00
|
|
|
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
PredBlock->getInstList().insert(PBI, NewBonusInst);
|
|
|
|
NewBonusInst->takeName(BonusInst);
|
|
|
|
BonusInst->setName(BonusInst->getName() + ".old");
|
2010-07-15 03:52:16 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 05:20:19 +08:00
|
|
|
// Clone Cond into the predecessor basic block, and or/and the
|
|
|
|
// two conditions together.
|
2009-09-27 15:38:41 +08:00
|
|
|
Instruction *New = Cond->clone();
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
RemapInstruction(New, VMap,
|
|
|
|
RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
|
2008-07-14 05:20:19 +08:00
|
|
|
PredBlock->getInstList().insert(PBI, New);
|
|
|
|
New->takeName(Cond);
|
[SimplifyCFG] threshold for folding branches with common destination
Summary:
This patch adds a threshold that controls the number of bonus instructions
allowed for folding branches with common destination. The original code allows
at most one bonus instruction. With this patch, users can customize the
threshold to allow multiple bonus instructions. The default threshold is still
1, so that the code behaves the same as before when users do not specify this
threshold.
The motivation of this change is that tuning this threshold significantly (up
to 25%) improves the performance of some CUDA programs in our internal code
base. In general, branch instructions are very expensive for GPU programs.
Therefore, it is sometimes worth trading more arithmetic computation for a more
straightened control flow. Here's a reduced example:
__global__ void foo(int a, int b, int c, int d, int e, int n,
const int *input, int *output) {
int sum = 0;
for (int i = 0; i < n; ++i)
sum += (((i ^ a) > b) && (((i | c ) ^ d) > e)) ? 0 : input[i];
*output = sum;
}
The select statement in the loop body translates to two branch instructions "if
((i ^ a) > b)" and "if (((i | c) ^ d) > e)" which share a common destination.
With the default threshold, SimplifyCFG is unable to fold them, because
computing the condition of the second branch "(i | c) ^ d > e" requires two
bonus instructions. With the threshold increased, SimplifyCFG can fold the two
branches so that the loop body contains only one branch, making the code
conceptually look like:
sum += (((i ^ a) > b) & (((i | c ) ^ d) > e)) ? 0 : input[i];
Increasing the threshold significantly improves the performance of this
particular example. In the configuration where both conditions are guaranteed
to be true, increasing the threshold from 1 to 2 improves the performance by
18.24%. Even in the configuration where the first condition is false and the
second condition is true, which favors shortcuts, increasing the threshold from
1 to 2 still improves the performance by 4.35%.
We are still looking for a good threshold and maybe a better cost model than
just counting the number of bonus instructions. However, according to the above
numbers, we think it is at least worth adding a threshold to enable more
experiments and tuning. Let me know what you think. Thanks!
Test Plan: Added one test case to check the threshold is in effect
Reviewers: nadav, eliben, meheff, resistor, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D5529
llvm-svn: 218711
2014-10-01 06:23:38 +08:00
|
|
|
Cond->setName(New->getName() + ".old");
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-06-13 13:43:29 +08:00
|
|
|
if (BI->isConditional()) {
|
2012-08-30 05:46:36 +08:00
|
|
|
Instruction *NewCond =
|
2012-06-13 13:43:29 +08:00
|
|
|
cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
|
2011-05-20 04:52:46 +08:00
|
|
|
New, "or.cond"));
|
2012-06-13 13:43:29 +08:00
|
|
|
PBI->setCondition(NewCond);
|
|
|
|
|
2012-09-15 08:39:57 +08:00
|
|
|
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
|
2014-12-10 01:32:12 +08:00
|
|
|
bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
|
|
|
|
PredFalseWeight);
|
|
|
|
bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
|
|
|
|
SuccFalseWeight);
|
2012-09-15 08:39:57 +08:00
|
|
|
SmallVector<uint64_t, 8> NewWeights;
|
|
|
|
|
2012-06-13 13:43:29 +08:00
|
|
|
if (PBI->getSuccessor(0) == BB) {
|
2012-09-15 08:39:57 +08:00
|
|
|
if (PredHasWeights && SuccHasWeights) {
|
|
|
|
// PBI: br i1 %x, BB, FalseDest
|
|
|
|
// BI: br i1 %y, TrueDest, FalseDest
|
|
|
|
//TrueWeight is TrueWeight for PBI * TrueWeight for BI.
|
|
|
|
NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
|
|
|
|
//FalseWeight is FalseWeight for PBI * TotalWeight for BI +
|
|
|
|
// TrueWeight for PBI * FalseWeight for BI.
|
|
|
|
// We assume that total weights of a BranchInst can fit into 32 bits.
|
|
|
|
// Therefore, we will not have overflow using 64-bit arithmetic.
|
|
|
|
NewWeights.push_back(PredFalseWeight * (SuccFalseWeight +
|
|
|
|
SuccTrueWeight) + PredTrueWeight * SuccFalseWeight);
|
|
|
|
}
|
2012-06-13 13:43:29 +08:00
|
|
|
AddPredecessorToBlock(TrueDest, PredBlock, BB);
|
|
|
|
PBI->setSuccessor(0, TrueDest);
|
|
|
|
}
|
|
|
|
if (PBI->getSuccessor(1) == BB) {
|
2012-09-15 08:39:57 +08:00
|
|
|
if (PredHasWeights && SuccHasWeights) {
|
|
|
|
// PBI: br i1 %x, TrueDest, BB
|
|
|
|
// BI: br i1 %y, TrueDest, FalseDest
|
|
|
|
//TrueWeight is TrueWeight for PBI * TotalWeight for BI +
|
|
|
|
// FalseWeight for PBI * TrueWeight for BI.
|
|
|
|
NewWeights.push_back(PredTrueWeight * (SuccFalseWeight +
|
|
|
|
SuccTrueWeight) + PredFalseWeight * SuccTrueWeight);
|
|
|
|
//FalseWeight is FalseWeight for PBI * FalseWeight for BI.
|
|
|
|
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
|
|
|
|
}
|
2012-06-13 13:43:29 +08:00
|
|
|
AddPredecessorToBlock(FalseDest, PredBlock, BB);
|
|
|
|
PBI->setSuccessor(1, FalseDest);
|
|
|
|
}
|
2012-09-15 08:39:57 +08:00
|
|
|
if (NewWeights.size() == 2) {
|
|
|
|
// Halve the weights if any of them cannot fit in an uint32_t
|
|
|
|
FitWeights(NewWeights);
|
|
|
|
|
|
|
|
SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end());
|
|
|
|
PBI->setMetadata(LLVMContext::MD_prof,
|
|
|
|
MDBuilder(BI->getContext()).
|
|
|
|
createBranchWeights(MDWeights));
|
|
|
|
} else
|
2014-04-25 13:29:35 +08:00
|
|
|
PBI->setMetadata(LLVMContext::MD_prof, nullptr);
|
2012-06-13 13:43:29 +08:00
|
|
|
} else {
|
|
|
|
// Update PHI nodes in the common successors.
|
|
|
|
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
|
2012-06-24 18:15:42 +08:00
|
|
|
ConstantInt *PBI_C = cast<ConstantInt>(
|
2012-06-13 13:43:29 +08:00
|
|
|
PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
|
|
|
|
assert(PBI_C->getType()->isIntegerTy(1));
|
2014-04-25 13:29:35 +08:00
|
|
|
Instruction *MergedCond = nullptr;
|
2012-06-13 13:43:29 +08:00
|
|
|
if (PBI->getSuccessor(0) == TrueDest) {
|
|
|
|
// Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
|
|
|
|
// PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
|
|
|
|
// is false: !PBI_Cond and BI_Value
|
|
|
|
Instruction *NotCond =
|
|
|
|
cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
|
|
|
|
"not.cond"));
|
|
|
|
MergedCond =
|
|
|
|
cast<Instruction>(Builder.CreateBinOp(Instruction::And,
|
|
|
|
NotCond, New,
|
|
|
|
"and.cond"));
|
|
|
|
if (PBI_C->isOne())
|
|
|
|
MergedCond =
|
|
|
|
cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
|
|
|
|
PBI->getCondition(), MergedCond,
|
|
|
|
"or.cond"));
|
|
|
|
} else {
|
|
|
|
// Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
|
|
|
|
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
|
|
|
|
// is false: PBI_Cond and BI_Value
|
2012-08-30 05:46:36 +08:00
|
|
|
MergedCond =
|
2012-06-13 13:43:29 +08:00
|
|
|
cast<Instruction>(Builder.CreateBinOp(Instruction::And,
|
|
|
|
PBI->getCondition(), New,
|
|
|
|
"and.cond"));
|
|
|
|
if (PBI_C->isOne()) {
|
|
|
|
Instruction *NotCond =
|
|
|
|
cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
|
|
|
|
"not.cond"));
|
2012-08-30 05:46:36 +08:00
|
|
|
MergedCond =
|
2012-06-13 13:43:29 +08:00
|
|
|
cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
|
|
|
|
NotCond, MergedCond,
|
|
|
|
"or.cond"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Update PHI Node.
|
|
|
|
PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
|
|
|
|
MergedCond);
|
|
|
|
}
|
|
|
|
// Change PBI from Conditional to Unconditional.
|
|
|
|
BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
|
|
|
|
EraseTerminatorInstAndDCECond(PBI);
|
|
|
|
PBI = New_PBI;
|
2008-07-14 05:12:01 +08:00
|
|
|
}
|
2011-04-07 06:37:20 +08:00
|
|
|
|
2011-12-27 12:31:52 +08:00
|
|
|
// TODO: If BB is reachable from all paths through PredBlock, then we
|
|
|
|
// could replace PBI's branch probabilities with BI's.
|
|
|
|
|
2011-04-14 10:44:53 +08:00
|
|
|
// Copy any debug value intrinsics into the end of PredBlock.
|
|
|
|
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
|
|
|
|
if (isa<DbgInfoIntrinsic>(*I))
|
|
|
|
I->clone()->insertBefore(PBI);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 13:57:30 +08:00
|
|
|
return true;
|
2008-07-14 05:12:01 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If we have a conditional branch as a predecessor of another block,
|
|
|
|
/// this function tries to simplify it. We know
|
2008-07-14 05:53:26 +08:00
|
|
|
/// that PBI and BI are both conditional branches, and BI is in one of the
|
|
|
|
/// successor blocks of PBI - PBI branches to BI.
|
|
|
|
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
|
|
|
|
assert(PBI->isConditional() && BI->isConditional());
|
|
|
|
BasicBlock *BB = BI->getParent();
|
2009-08-13 00:23:25 +08:00
|
|
|
|
2008-07-14 05:53:26 +08:00
|
|
|
// If this block ends with a branch instruction, and if there is a
|
2012-08-30 05:46:36 +08:00
|
|
|
// predecessor that ends on a branch of the same condition, make
|
2008-07-14 05:53:26 +08:00
|
|
|
// this conditional branch redundant.
|
|
|
|
if (PBI->getCondition() == BI->getCondition() &&
|
|
|
|
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
|
|
|
|
// Okay, the outcome of this conditional branch is statically
|
|
|
|
// knowable. If this block had a single pred, handle specially.
|
|
|
|
if (BB->getSinglePredecessor()) {
|
|
|
|
// Turn this into a branch on constant.
|
|
|
|
bool CondIsTrue = PBI->getSuccessor(0) == BB;
|
2012-08-30 05:46:36 +08:00
|
|
|
BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
|
2009-08-14 05:58:54 +08:00
|
|
|
CondIsTrue));
|
2008-07-14 05:53:26 +08:00
|
|
|
return true; // Nuke the branch on constant.
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 05:53:26 +08:00
|
|
|
// Otherwise, if there are multiple predecessors, insert a PHI that merges
|
|
|
|
// in the constant and simplify the block result. Subsequent passes of
|
|
|
|
// simplifycfg will thread the block.
|
|
|
|
if (BlockIsSimpleEnoughToThreadThrough(BB)) {
|
2011-03-30 19:19:20 +08:00
|
|
|
pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
|
2009-08-14 05:58:54 +08:00
|
|
|
PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
|
2011-03-30 19:28:46 +08:00
|
|
|
std::distance(PB, PE),
|
2008-07-14 05:53:26 +08:00
|
|
|
BI->getCondition()->getName() + ".pr",
|
|
|
|
BB->begin());
|
2008-07-14 05:55:46 +08:00
|
|
|
// Okay, we're going to insert the PHI node. Since PBI is not the only
|
|
|
|
// predecessor, compute the PHI'd conditional value for all of the preds.
|
|
|
|
// Any predecessor where the condition is not computable we keep symbolic.
|
2011-03-30 19:19:20 +08:00
|
|
|
for (pred_iterator PI = PB; PI != PE; ++PI) {
|
2010-07-12 18:59:23 +08:00
|
|
|
BasicBlock *P = *PI;
|
|
|
|
if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
|
2008-07-14 05:53:26 +08:00
|
|
|
PBI != BI && PBI->isConditional() &&
|
|
|
|
PBI->getCondition() == BI->getCondition() &&
|
|
|
|
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
|
|
|
|
bool CondIsTrue = PBI->getSuccessor(0) == BB;
|
2012-08-30 05:46:36 +08:00
|
|
|
NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
|
2010-07-12 18:59:23 +08:00
|
|
|
CondIsTrue), P);
|
2008-07-14 05:53:26 +08:00
|
|
|
} else {
|
2010-07-12 18:59:23 +08:00
|
|
|
NewPN->addIncoming(BI->getCondition(), P);
|
2008-07-14 05:53:26 +08:00
|
|
|
}
|
2010-07-12 18:59:23 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 05:53:26 +08:00
|
|
|
BI->setCondition(NewPN);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 05:53:26 +08:00
|
|
|
// If this is a conditional branch in an empty block, and if any
|
2014-07-07 07:10:24 +08:00
|
|
|
// predecessors are a conditional branch to one of our destinations,
|
2008-07-14 05:53:26 +08:00
|
|
|
// fold the conditions into logical ops and one cond br.
|
2009-02-26 14:56:37 +08:00
|
|
|
BasicBlock::iterator BBI = BB->begin();
|
|
|
|
// Ignore dbg intrinsics.
|
|
|
|
while (isa<DbgInfoIntrinsic>(BBI))
|
|
|
|
++BBI;
|
|
|
|
if (&*BBI != BI)
|
2008-07-14 06:04:41 +08:00
|
|
|
return false;
|
2009-01-20 09:15:41 +08:00
|
|
|
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2009-01-20 09:15:41 +08:00
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
|
|
|
|
if (CE->canTrap())
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
int PBIOp, BIOp;
|
|
|
|
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
|
|
|
|
PBIOp = BIOp = 0;
|
|
|
|
else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
|
|
|
|
PBIOp = 0, BIOp = 1;
|
|
|
|
else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
|
|
|
|
PBIOp = 1, BIOp = 0;
|
|
|
|
else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
|
|
|
|
PBIOp = BIOp = 1;
|
|
|
|
else
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// Check to make sure that the other destination of this branch
|
|
|
|
// isn't BB itself. If so, this is an infinite loop that will
|
|
|
|
// keep getting unwound.
|
|
|
|
if (PBI->getSuccessor(PBIOp) == BB)
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
|
|
|
// Do not perform this transformation if it would require
|
2008-07-14 06:04:41 +08:00
|
|
|
// insertion of a large number of select instructions. For targets
|
|
|
|
// without predication/cmovs, this is a big pessimization.
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-07-08 05:19:00 +08:00
|
|
|
// Also do not perform this transformation if any phi node in the common
|
|
|
|
// destination block can trap when reached by BB or PBB (PR17073). In that
|
|
|
|
// case, it would be unsafe to hoist the operation into a select instruction.
|
|
|
|
|
|
|
|
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
|
2008-07-14 06:04:41 +08:00
|
|
|
unsigned NumPhis = 0;
|
|
|
|
for (BasicBlock::iterator II = CommonDest->begin();
|
2014-07-08 05:19:00 +08:00
|
|
|
isa<PHINode>(II); ++II, ++NumPhis) {
|
2008-07-14 06:04:41 +08:00
|
|
|
if (NumPhis > 2) // Disable this xform.
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-07-08 05:19:00 +08:00
|
|
|
PHINode *PN = cast<PHINode>(II);
|
|
|
|
Value *BIV = PN->getIncomingValueForBlock(BB);
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
|
|
|
|
if (CE->canTrap())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
|
|
|
|
Value *PBIV = PN->getIncomingValue(PBBIdx);
|
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
|
|
|
|
if (CE->canTrap())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// Finally, if everything is ok, fold the branches to logical ops.
|
2014-07-08 05:19:00 +08:00
|
|
|
BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-01-05 09:26:52 +08:00
|
|
|
DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
|
2009-08-23 12:37:46 +08:00
|
|
|
<< "AND: " << *BI->getParent());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
|
|
|
|
2008-07-14 06:23:11 +08:00
|
|
|
// If OtherDest *is* BB, then BB is a basic block with a single conditional
|
|
|
|
// branch in it, where one edge (OtherDest) goes back to itself but the other
|
|
|
|
// exits. We don't *know* that the program avoids the infinite loop
|
|
|
|
// (even though that seems likely). If we do this xform naively, we'll end up
|
|
|
|
// recursively unpeeling the loop. Since we know that (after the xform is
|
|
|
|
// done) that the block *is* infinite if reached, we just make it an obviously
|
|
|
|
// infinite loop with no cond branch.
|
|
|
|
if (OtherDest == BB) {
|
|
|
|
// Insert it at the end of the function, because it's either code,
|
|
|
|
// or it won't matter if it's hot. :)
|
2009-08-14 05:58:54 +08:00
|
|
|
BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
|
|
|
|
"infloop", BB->getParent());
|
2008-07-14 06:23:11 +08:00
|
|
|
BranchInst::Create(InfLoopBlock, InfLoopBlock);
|
|
|
|
OtherDest = InfLoopBlock;
|
2012-08-30 05:46:36 +08:00
|
|
|
}
|
|
|
|
|
2010-01-05 09:26:52 +08:00
|
|
|
DEBUG(dbgs() << *PBI->getParent()->getParent());
|
2011-05-20 04:52:46 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// BI may have other predecessors. Because of this, we leave
|
|
|
|
// it alone, but modify PBI.
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// Make sure we get to CommonDest on True&True directions.
|
|
|
|
Value *PBICond = PBI->getCondition();
|
2011-05-20 04:52:46 +08:00
|
|
|
IRBuilder<true, NoFolder> Builder(PBI);
|
2008-07-14 06:04:41 +08:00
|
|
|
if (PBIOp)
|
2011-05-20 04:52:46 +08:00
|
|
|
PBICond = Builder.CreateNot(PBICond, PBICond->getName()+".not");
|
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
Value *BICond = BI->getCondition();
|
|
|
|
if (BIOp)
|
2011-05-20 04:52:46 +08:00
|
|
|
BICond = Builder.CreateNot(BICond, BICond->getName()+".not");
|
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// Merge the conditions.
|
2011-05-20 04:52:46 +08:00
|
|
|
Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// Modify PBI to branch on the new condition to the new dests.
|
|
|
|
PBI->setCondition(Cond);
|
|
|
|
PBI->setSuccessor(0, CommonDest);
|
|
|
|
PBI->setSuccessor(1, OtherDest);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-09-18 05:30:40 +08:00
|
|
|
// Update branch weight for PBI.
|
|
|
|
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
|
2014-12-10 01:32:12 +08:00
|
|
|
bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight,
|
|
|
|
PredFalseWeight);
|
|
|
|
bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight,
|
|
|
|
SuccFalseWeight);
|
2012-09-18 05:30:40 +08:00
|
|
|
if (PredHasWeights && SuccHasWeights) {
|
|
|
|
uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
|
|
|
|
uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight;
|
|
|
|
uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
|
|
|
|
uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
|
|
|
|
// The weight to CommonDest should be PredCommon * SuccTotal +
|
|
|
|
// PredOther * SuccCommon.
|
|
|
|
// The weight to OtherDest should be PredOther * SuccOther.
|
2015-02-19 23:26:17 +08:00
|
|
|
uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
|
|
|
|
PredOther * SuccCommon,
|
|
|
|
PredOther * SuccOther};
|
2012-09-18 05:30:40 +08:00
|
|
|
// Halve the weights if any of them cannot fit in an uint32_t
|
|
|
|
FitWeights(NewWeights);
|
|
|
|
|
|
|
|
PBI->setMetadata(LLVMContext::MD_prof,
|
2015-02-19 23:26:17 +08:00
|
|
|
MDBuilder(BI->getContext())
|
|
|
|
.createBranchWeights(NewWeights[0], NewWeights[1]));
|
2012-09-18 05:30:40 +08:00
|
|
|
}
|
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// OtherDest may have phi nodes. If so, add an entry from PBI's
|
|
|
|
// block that are identical to the entries for BI's block.
|
2010-12-14 15:09:42 +08:00
|
|
|
AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// We know that the CommonDest already had an edge from PBI to
|
|
|
|
// it. If it has PHIs though, the PHIs may have different
|
|
|
|
// entries for BB and PBI's BB. If so, insert a select to make
|
|
|
|
// them agree.
|
2010-12-14 15:09:42 +08:00
|
|
|
PHINode *PN;
|
2008-07-14 06:04:41 +08:00
|
|
|
for (BasicBlock::iterator II = CommonDest->begin();
|
|
|
|
(PN = dyn_cast<PHINode>(II)); ++II) {
|
|
|
|
Value *BIV = PN->getIncomingValueForBlock(BB);
|
|
|
|
unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
|
|
|
|
Value *PBIV = PN->getIncomingValue(PBBIdx);
|
|
|
|
if (BIV != PBIV) {
|
|
|
|
// Insert a select in PBI to pick the right value.
|
2011-05-20 04:52:46 +08:00
|
|
|
Value *NV = cast<SelectInst>
|
|
|
|
(Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName()+".mux"));
|
2008-07-14 06:04:41 +08:00
|
|
|
PN->setIncomingValue(PBBIdx, NV);
|
2008-07-14 05:53:26 +08:00
|
|
|
}
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-01-05 09:26:52 +08:00
|
|
|
DEBUG(dbgs() << "INTO: " << *PBI->getParent());
|
|
|
|
DEBUG(dbgs() << *PBI->getParent()->getParent());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2008-07-14 06:04:41 +08:00
|
|
|
// This basic block is probably dead. We know it has at least
|
|
|
|
// one fewer predecessor.
|
|
|
|
return true;
|
2008-07-14 05:53:26 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
|
|
|
|
// true or to FalseBB if Cond is false.
|
2011-01-11 20:52:11 +08:00
|
|
|
// Takes care of updating the successors and removing the old terminator.
|
|
|
|
// Also makes sure not to introduce new successors by assuming that edges to
|
|
|
|
// non-successor TrueBBs and FalseBBs aren't reachable.
|
|
|
|
static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
|
2012-09-18 06:28:55 +08:00
|
|
|
BasicBlock *TrueBB, BasicBlock *FalseBB,
|
|
|
|
uint32_t TrueWeight,
|
|
|
|
uint32_t FalseWeight){
|
2010-12-06 02:29:03 +08:00
|
|
|
// Remove any superfluous successor edges from the CFG.
|
|
|
|
// First, figure out which successors to preserve.
|
|
|
|
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
|
|
|
|
// successor.
|
|
|
|
BasicBlock *KeepEdge1 = TrueBB;
|
2014-04-25 13:29:35 +08:00
|
|
|
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
|
2010-12-06 02:29:03 +08:00
|
|
|
|
|
|
|
// Then remove the rest.
|
2011-01-11 20:52:11 +08:00
|
|
|
for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
|
|
|
|
BasicBlock *Succ = OldTerm->getSuccessor(I);
|
2010-12-06 02:29:03 +08:00
|
|
|
// Make sure only to keep exactly one copy of each edge.
|
|
|
|
if (Succ == KeepEdge1)
|
2014-04-25 13:29:35 +08:00
|
|
|
KeepEdge1 = nullptr;
|
2010-12-06 02:29:03 +08:00
|
|
|
else if (Succ == KeepEdge2)
|
2014-04-25 13:29:35 +08:00
|
|
|
KeepEdge2 = nullptr;
|
2010-12-06 02:29:03 +08:00
|
|
|
else
|
2011-01-11 20:52:11 +08:00
|
|
|
Succ->removePredecessor(OldTerm->getParent());
|
2010-12-06 02:29:03 +08:00
|
|
|
}
|
|
|
|
|
2011-05-19 02:43:31 +08:00
|
|
|
IRBuilder<> Builder(OldTerm);
|
|
|
|
Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
|
|
|
|
|
2010-12-06 02:29:03 +08:00
|
|
|
// Insert an appropriate new terminator.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!KeepEdge1 && !KeepEdge2) {
|
2010-12-06 02:29:03 +08:00
|
|
|
if (TrueBB == FalseBB)
|
|
|
|
// We were only looking for one successor, and it was present.
|
|
|
|
// Create an unconditional branch to it.
|
2011-05-19 02:43:31 +08:00
|
|
|
Builder.CreateBr(TrueBB);
|
2012-09-18 06:28:55 +08:00
|
|
|
else {
|
2010-12-06 02:29:03 +08:00
|
|
|
// We found both of the successors we were looking for.
|
|
|
|
// Create a conditional branch sharing the condition of the select.
|
2012-09-18 06:28:55 +08:00
|
|
|
BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
|
|
|
|
if (TrueWeight != FalseWeight)
|
|
|
|
NewBI->setMetadata(LLVMContext::MD_prof,
|
|
|
|
MDBuilder(OldTerm->getContext()).
|
|
|
|
createBranchWeights(TrueWeight, FalseWeight));
|
|
|
|
}
|
2010-12-06 02:29:03 +08:00
|
|
|
} else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
|
|
|
|
// Neither of the selected blocks were successors, so this
|
2011-01-11 20:52:11 +08:00
|
|
|
// terminator must be unreachable.
|
|
|
|
new UnreachableInst(OldTerm->getContext(), OldTerm);
|
2010-12-06 02:29:03 +08:00
|
|
|
} else {
|
|
|
|
// One of the selected values was a successor, but the other wasn't.
|
|
|
|
// Insert an unconditional branch to the one that was found;
|
|
|
|
// the edge to the one that wasn't must be unreachable.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!KeepEdge1)
|
2010-12-06 02:29:03 +08:00
|
|
|
// Only TrueBB was found.
|
2011-05-19 02:43:31 +08:00
|
|
|
Builder.CreateBr(TrueBB);
|
2010-12-06 02:29:03 +08:00
|
|
|
else
|
|
|
|
// Only FalseBB was found.
|
2011-05-19 02:43:31 +08:00
|
|
|
Builder.CreateBr(FalseBB);
|
2010-12-06 02:29:03 +08:00
|
|
|
}
|
|
|
|
|
2011-01-11 20:52:11 +08:00
|
|
|
EraseTerminatorInstAndDCECond(OldTerm);
|
2010-12-06 02:29:03 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Replaces
|
2011-02-28 17:44:07 +08:00
|
|
|
// (switch (select cond, X, Y)) on constant X, Y
|
|
|
|
// with a branch - conditional if X and Y lead to distinct BBs,
|
|
|
|
// unconditional otherwise.
|
|
|
|
static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
|
|
|
|
// Check for constant integer values in the select.
|
|
|
|
ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
|
|
|
|
ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
|
|
|
|
if (!TrueVal || !FalseVal)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Find the relevant condition and destinations.
|
|
|
|
Value *Condition = Select->getCondition();
|
2012-03-08 15:06:20 +08:00
|
|
|
BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
|
|
|
|
BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
|
2011-02-28 17:44:07 +08:00
|
|
|
|
2012-09-18 06:28:55 +08:00
|
|
|
// Get weight for TrueBB and FalseBB.
|
|
|
|
uint32_t TrueWeight = 0, FalseWeight = 0;
|
|
|
|
SmallVector<uint64_t, 8> Weights;
|
|
|
|
bool HasWeights = HasBranchWeights(SI);
|
|
|
|
if (HasWeights) {
|
|
|
|
GetBranchWeights(SI, Weights);
|
|
|
|
if (Weights.size() == 1 + SI->getNumCases()) {
|
|
|
|
TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal).
|
|
|
|
getSuccessorIndex()];
|
|
|
|
FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal).
|
|
|
|
getSuccessorIndex()];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-02-28 17:44:07 +08:00
|
|
|
// Perform the actual simplification.
|
2012-09-18 06:28:55 +08:00
|
|
|
return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB,
|
|
|
|
TrueWeight, FalseWeight);
|
2011-02-28 17:44:07 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Replaces
|
2011-01-11 20:52:11 +08:00
|
|
|
// (indirectbr (select cond, blockaddress(@fn, BlockA),
|
|
|
|
// blockaddress(@fn, BlockB)))
|
|
|
|
// with
|
|
|
|
// (br cond, BlockA, BlockB).
|
|
|
|
static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
|
|
|
|
// Check that both operands of the select are block addresses.
|
|
|
|
BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
|
|
|
|
BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
|
|
|
|
if (!TBA || !FBA)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Extract the actual blocks.
|
|
|
|
BasicBlock *TrueBB = TBA->getBasicBlock();
|
|
|
|
BasicBlock *FalseBB = FBA->getBasicBlock();
|
|
|
|
|
|
|
|
// Perform the actual simplification.
|
2012-09-18 06:28:55 +08:00
|
|
|
return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
|
|
|
|
0, 0);
|
2011-01-11 20:52:11 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// This is called when we find an icmp instruction
|
|
|
|
/// (a seteq/setne with a constant) as the only instruction in a
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
/// block that ends with an uncond branch. We are looking for a very specific
|
|
|
|
/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
|
|
|
|
/// this case, we merge the first two "or's of icmp" into a switch, but then the
|
|
|
|
/// default value goes to an uncond block with a seteq in it, we get something
|
|
|
|
/// like:
|
|
|
|
///
|
|
|
|
/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
|
|
|
|
/// DEFAULT:
|
|
|
|
/// %tmp = icmp eq i8 %A, 92
|
|
|
|
/// br label %end
|
|
|
|
/// end:
|
|
|
|
/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
|
2012-08-30 05:46:36 +08:00
|
|
|
///
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
/// We prefer to split the edge to 'end' so that there is a true/false entry to
|
|
|
|
/// the PHI, merging the third icmp into the switch.
|
2013-01-07 11:53:25 +08:00
|
|
|
static bool TryToSimplifyUncondBranchWithICmpInIt(
|
2015-03-10 10:37:25 +08:00
|
|
|
ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
|
|
|
|
const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
|
|
|
|
AssumptionCache *AC) {
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
BasicBlock *BB = ICI->getParent();
|
2011-05-19 02:28:48 +08:00
|
|
|
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
// If the block has any PHIs in it or the icmp has multiple uses, it is too
|
|
|
|
// complex.
|
|
|
|
if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
|
|
|
|
|
|
|
|
Value *V = ICI->getOperand(0);
|
|
|
|
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
|
2012-08-30 05:46:36 +08:00
|
|
|
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
// The pattern we're looking for is where our only predecessor is a switch on
|
|
|
|
// 'V' and this block is the default case for the switch. In this case we can
|
|
|
|
// fold the compared value into the switch to simplify things.
|
|
|
|
BasicBlock *Pred = BB->getSinglePredecessor();
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
|
|
|
|
if (SI->getCondition() != V)
|
|
|
|
return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
// If BB is reachable on a non-default case, then we simply know the value of
|
|
|
|
// V in this block. Substitute it and constant fold the icmp instruction
|
|
|
|
// away.
|
|
|
|
if (SI->getDefaultDest() != BB) {
|
|
|
|
ConstantInt *VVal = SI->findCaseDest(BB);
|
|
|
|
assert(VVal && "Should have a unique destination value");
|
|
|
|
ICI->setOperand(0, VVal);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-02-21 08:06:31 +08:00
|
|
|
if (Value *V = SimplifyInstruction(ICI, DL)) {
|
2010-12-14 14:17:25 +08:00
|
|
|
ICI->replaceAllUsesWith(V);
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
ICI->eraseFromParent();
|
|
|
|
}
|
|
|
|
// BB is now empty, so it is likely to simplify away.
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 11:43:57 +08:00
|
|
|
// Ok, the block is reachable from the default dest. If the constant we're
|
|
|
|
// comparing exists in one of the other edges, then we can constant fold ICI
|
|
|
|
// and zap it.
|
2012-03-11 14:09:17 +08:00
|
|
|
if (SI->findCaseValue(Cst) != SI->case_default()) {
|
2010-12-13 11:43:57 +08:00
|
|
|
Value *V;
|
|
|
|
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
|
|
|
|
V = ConstantInt::getFalse(BB->getContext());
|
|
|
|
else
|
|
|
|
V = ConstantInt::getTrue(BB->getContext());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 11:43:57 +08:00
|
|
|
ICI->replaceAllUsesWith(V);
|
|
|
|
ICI->eraseFromParent();
|
|
|
|
// BB is now empty, so it is likely to simplify away.
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 11:43:57 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
// The use of the icmp has to be in the 'end' block, by the only PHI node in
|
|
|
|
// the block.
|
|
|
|
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
|
2014-03-09 11:16:01 +08:00
|
|
|
PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
|
2014-04-25 13:29:35 +08:00
|
|
|
if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If the icmp is a SETEQ, then the default dest gets false, the new edge gets
|
|
|
|
// true in the PHI.
|
|
|
|
Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
|
|
|
|
Constant *NewCst = ConstantInt::getFalse(BB->getContext());
|
|
|
|
|
|
|
|
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
|
|
|
|
std::swap(DefaultCst, NewCst);
|
|
|
|
|
|
|
|
// Replace ICI (which is used by the PHI for the default value) with true or
|
|
|
|
// false depending on if it is EQ or NE.
|
|
|
|
ICI->replaceAllUsesWith(DefaultCst);
|
|
|
|
ICI->eraseFromParent();
|
|
|
|
|
|
|
|
// Okay, the switch goes to this block on a default value. Add an edge from
|
|
|
|
// the switch to the merge point on the compared value.
|
|
|
|
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
|
|
|
|
BB->getParent(), BB);
|
2012-09-18 07:07:43 +08:00
|
|
|
SmallVector<uint64_t, 8> Weights;
|
|
|
|
bool HasWeights = HasBranchWeights(SI);
|
|
|
|
if (HasWeights) {
|
|
|
|
GetBranchWeights(SI, Weights);
|
|
|
|
if (Weights.size() == 1 + SI->getNumCases()) {
|
|
|
|
// Split weight for default case to case for "Cst".
|
|
|
|
Weights[0] = (Weights[0]+1) >> 1;
|
|
|
|
Weights.push_back(Weights[0]);
|
|
|
|
|
|
|
|
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
|
|
|
|
SI->setMetadata(LLVMContext::MD_prof,
|
|
|
|
MDBuilder(SI->getContext()).
|
|
|
|
createBranchWeights(MDWeights));
|
|
|
|
}
|
|
|
|
}
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
SI->addCase(Cst, NewBB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
// NewBB branches to the phi block, add the uncond branch and the phi entry.
|
2011-05-19 02:28:48 +08:00
|
|
|
Builder.SetInsertPoint(NewBB);
|
|
|
|
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
|
|
|
|
Builder.CreateBr(SuccBlock);
|
fix a fairly serious oversight with switch formation from
or'd conditions. Previously we'd compile something like this:
int crud (unsigned char c) {
return c == 62 || c == 34 || c == 92;
}
into:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
]
lor.rhs: ; preds = %entry
%cmp8 = icmp eq i8 %c, 92
br label %lor.end
lor.end: ; preds = %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ %cmp8, %lor.rhs ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which failed to merge the compare-with-92 into the switch. With this patch
we simplify this all the way to:
switch i8 %c, label %lor.rhs [
i8 62, label %lor.end
i8 34, label %lor.end
i8 92, label %lor.end
]
lor.rhs: ; preds = %entry
br label %lor.end
lor.end: ; preds = %entry, %entry, %entry, %lor.rhs
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
which is much better for codegen's switch lowering stuff. This kicks in 33 times
on 176.gcc (for example) cutting 103 instructions off the generated code.
llvm-svn: 121671
2010-12-13 11:18:54 +08:00
|
|
|
PHIUse->addIncoming(NewCst, NewBB);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// The specified branch is a conditional branch.
|
2010-12-13 13:03:41 +08:00
|
|
|
/// Check to see if it is branching on an or/and chain of icmp instructions, and
|
|
|
|
/// fold it into a switch instruction if so.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
|
|
|
|
const DataLayout &DL) {
|
2010-12-13 13:03:41 +08:00
|
|
|
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Cond) return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// Change br (X == 0 | X == 1), T, F into a switch instruction.
|
|
|
|
// If this is a bunch of seteq's or'd together, or if it's a bunch of
|
|
|
|
// 'setne's and'ed together, collect them.
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2014-11-20 04:09:11 +08:00
|
|
|
// Try to gather values from a chain of and/or to be turned into a switch
|
2014-11-21 06:40:25 +08:00
|
|
|
ConstantComparesGatherer ConstantCompare(Cond, DL);
|
|
|
|
// Unpack the result
|
|
|
|
SmallVectorImpl<ConstantInt*> &Values = ConstantCompare.Vals;
|
|
|
|
Value *CompVal = ConstantCompare.CompValue;
|
|
|
|
unsigned UsedICmps = ConstantCompare.UsedICmps;
|
|
|
|
Value *ExtraCase = ConstantCompare.Extra;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// If we didn't have a multiply compared value, fail.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!CompVal) return false;
|
2010-12-13 13:03:41 +08:00
|
|
|
|
2011-02-08 06:37:28 +08:00
|
|
|
// Avoid turning single icmps into a switch.
|
|
|
|
if (UsedICmps <= 1)
|
|
|
|
return false;
|
|
|
|
|
2014-11-21 06:40:25 +08:00
|
|
|
bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
|
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// There might be duplicate constants in the list, which the switch
|
|
|
|
// instruction can't handle, remove them now.
|
|
|
|
array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
|
|
|
|
Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// If Extra was used, we require at least two switch values to do the
|
|
|
|
// transformation. A switch with one value is just an cond branch.
|
|
|
|
if (ExtraCase && Values.size() < 2) return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-08-30 05:46:38 +08:00
|
|
|
// TODO: Preserve branch weight metadata, similarly to how
|
|
|
|
// FoldValueComparisonIntoPredecessors preserves it.
|
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// Figure out which block is which destination.
|
|
|
|
BasicBlock *DefaultBB = BI->getSuccessor(1);
|
|
|
|
BasicBlock *EdgeBB = BI->getSuccessor(0);
|
|
|
|
if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
BasicBlock *BB = BI->getParent();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 14:17:25 +08:00
|
|
|
DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
|
2010-12-14 13:57:30 +08:00
|
|
|
<< " cases into SWITCH. BB is:\n" << *BB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// If there are any extra values that couldn't be folded into the switch
|
|
|
|
// then we evaluate them with an explicit branch first. Split the block
|
|
|
|
// right before the condbr to handle it.
|
|
|
|
if (ExtraCase) {
|
|
|
|
BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
|
|
|
|
// Remove the uncond branch added to the old block.
|
|
|
|
TerminatorInst *OldTI = BB->getTerminator();
|
2011-05-19 07:18:47 +08:00
|
|
|
Builder.SetInsertPoint(OldTI);
|
|
|
|
|
2010-12-14 13:57:30 +08:00
|
|
|
if (TrueWhenEqual)
|
2011-05-19 07:18:47 +08:00
|
|
|
Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
|
2010-12-14 13:57:30 +08:00
|
|
|
else
|
2011-05-19 07:18:47 +08:00
|
|
|
Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
OldTI->eraseFromParent();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:34:18 +08:00
|
|
|
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
|
|
|
|
// for the edge we just added.
|
2010-12-14 15:09:42 +08:00
|
|
|
AddPredecessorToBlock(EdgeBB, BB, NewBB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 14:17:25 +08:00
|
|
|
DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
|
|
|
|
<< "\nEXTRABB = " << *BB);
|
2010-12-13 13:03:41 +08:00
|
|
|
BB = NewBB;
|
|
|
|
}
|
2011-05-19 07:18:47 +08:00
|
|
|
|
|
|
|
Builder.SetInsertPoint(BI);
|
2010-12-13 13:03:41 +08:00
|
|
|
// Convert pointer to int before we switch.
|
|
|
|
if (CompVal->getType()->isPointerTy()) {
|
2015-03-10 10:37:25 +08:00
|
|
|
CompVal = Builder.CreatePtrToInt(
|
|
|
|
CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
|
2010-12-13 13:03:41 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// Create the new switch instruction now.
|
2011-05-19 07:18:47 +08:00
|
|
|
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
|
2011-05-18 07:29:05 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// Add all of the 'cases' to the switch instruction.
|
|
|
|
for (unsigned i = 0, e = Values.size(); i != e; ++i)
|
|
|
|
New->addCase(Values[i], EdgeBB);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// We added edges from PI to the EdgeBB. As such, if there were any
|
|
|
|
// PHI nodes in EdgeBB, they need entries to be added corresponding to
|
|
|
|
// the number of edges added.
|
|
|
|
for (BasicBlock::iterator BBI = EdgeBB->begin();
|
|
|
|
isa<PHINode>(BBI); ++BBI) {
|
|
|
|
PHINode *PN = cast<PHINode>(BBI);
|
|
|
|
Value *InVal = PN->getIncomingValueForBlock(BB);
|
|
|
|
for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
|
|
|
|
PN->addIncoming(InVal, BB);
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 13:03:41 +08:00
|
|
|
// Erase the old branch instruction.
|
|
|
|
EraseTerminatorInstAndDCECond(BI);
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-14 14:17:25 +08:00
|
|
|
DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
|
2010-12-13 13:03:41 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-09-05 20:57:57 +08:00
|
|
|
bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
|
|
|
|
// If this is a trivial landing pad that just continues unwinding the caught
|
|
|
|
// exception then zap the landing pad, turning its invokes into calls.
|
|
|
|
BasicBlock *BB = RI->getParent();
|
|
|
|
LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
|
|
|
|
if (RI->getValue() != LPInst)
|
|
|
|
// Not a landing pad, or the resume is not unwinding the exception that
|
|
|
|
// caused control to branch here.
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check that there are no other instructions except for debug intrinsics.
|
|
|
|
BasicBlock::iterator I = LPInst, E = RI;
|
|
|
|
while (++I != E)
|
|
|
|
if (!isa<DbgInfoIntrinsic>(I))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Turn all invokes that unwind here into calls and delete the basic block.
|
2014-07-22 01:06:51 +08:00
|
|
|
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
|
|
|
|
InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
|
2011-09-05 20:57:57 +08:00
|
|
|
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
|
|
|
|
// Insert a call instruction before the invoke.
|
|
|
|
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
|
|
|
|
Call->takeName(II);
|
|
|
|
Call->setCallingConv(II->getCallingConv());
|
|
|
|
Call->setAttributes(II->getAttributes());
|
|
|
|
Call->setDebugLoc(II->getDebugLoc());
|
|
|
|
|
|
|
|
// Anything that used the value produced by the invoke instruction now uses
|
|
|
|
// the value produced by the call instruction. Note that we do this even
|
|
|
|
// for void functions and calls with no uses so that the callgraph edge is
|
|
|
|
// updated.
|
|
|
|
II->replaceAllUsesWith(Call);
|
|
|
|
BB->removePredecessor(II->getParent());
|
|
|
|
|
|
|
|
// Insert a branch to the normal destination right before the invoke.
|
|
|
|
BranchInst::Create(II->getNormalDest(), II);
|
|
|
|
|
|
|
|
// Finally, delete the invoke instruction!
|
|
|
|
II->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2015-01-23 03:29:46 +08:00
|
|
|
// The landingpad is now unreachable. Zap it.
|
|
|
|
BB->eraseFromParent();
|
|
|
|
return true;
|
2011-09-05 20:57:57 +08:00
|
|
|
}
|
|
|
|
|
2011-05-19 05:33:11 +08:00
|
|
|
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
|
2010-12-13 14:25:44 +08:00
|
|
|
BasicBlock *BB = RI->getParent();
|
|
|
|
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Find predecessors that end with branches.
|
|
|
|
SmallVector<BasicBlock*, 8> UncondBranchPreds;
|
|
|
|
SmallVector<BranchInst*, 8> CondBranchPreds;
|
2014-07-22 01:06:51 +08:00
|
|
|
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
|
|
|
|
BasicBlock *P = *PI;
|
2010-12-13 14:25:44 +08:00
|
|
|
TerminatorInst *PTI = P->getTerminator();
|
|
|
|
if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
|
|
|
|
if (BI->isUnconditional())
|
|
|
|
UncondBranchPreds.push_back(P);
|
|
|
|
else
|
|
|
|
CondBranchPreds.push_back(BI);
|
|
|
|
}
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If we found some, do the transformation!
|
2011-01-29 12:46:23 +08:00
|
|
|
if (!UncondBranchPreds.empty() && DupRet) {
|
2010-12-13 14:25:44 +08:00
|
|
|
while (!UncondBranchPreds.empty()) {
|
|
|
|
BasicBlock *Pred = UncondBranchPreds.pop_back_val();
|
|
|
|
DEBUG(dbgs() << "FOLDING: " << *BB
|
|
|
|
<< "INTO UNCOND BRANCH PRED: " << *Pred);
|
2011-01-29 12:46:23 +08:00
|
|
|
(void)FoldReturnIntoUncondBranch(RI, BB, Pred);
|
2010-12-13 14:25:44 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If we eliminated all predecessors of the block, delete the block now.
|
2015-01-13 11:46:47 +08:00
|
|
|
if (pred_empty(BB))
|
2010-12-13 14:25:44 +08:00
|
|
|
// We know there are no successors, so just nuke the block.
|
|
|
|
BB->eraseFromParent();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2002-05-22 04:50:24 +08:00
|
|
|
return true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Check out all of the conditional branches going to this return
|
|
|
|
// instruction. If any of them just select between returns, change the
|
|
|
|
// branch itself into a select/return pair.
|
|
|
|
while (!CondBranchPreds.empty()) {
|
|
|
|
BranchInst *BI = CondBranchPreds.pop_back_val();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Check to see if the non-BB successor is also a return block.
|
|
|
|
if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
|
|
|
|
isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
|
2011-05-19 05:33:11 +08:00
|
|
|
SimplifyCondBranchToTwoReturns(BI, Builder))
|
2010-12-13 14:25:44 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2002-05-22 04:50:24 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
|
|
|
|
BasicBlock *BB = UI->getParent();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
bool Changed = false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If there are any instructions immediately before the unreachable that can
|
|
|
|
// be removed, do so.
|
|
|
|
while (UI != BB->begin()) {
|
|
|
|
BasicBlock::iterator BBI = UI;
|
|
|
|
--BBI;
|
2011-08-16 07:59:28 +08:00
|
|
|
// Do not delete instructions that can have side effects which might cause
|
|
|
|
// the unreachable to not be reachable; specifically, calls and volatile
|
|
|
|
// operations may have this effect.
|
2010-12-13 14:25:44 +08:00
|
|
|
if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
|
2011-08-16 07:59:28 +08:00
|
|
|
|
|
|
|
if (BBI->mayHaveSideEffects()) {
|
|
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
|
|
|
|
if (SI->isVolatile())
|
|
|
|
break;
|
|
|
|
} else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
|
|
|
|
if (LI->isVolatile())
|
|
|
|
break;
|
|
|
|
} else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
|
|
|
|
if (RMWI->isVolatile())
|
|
|
|
break;
|
|
|
|
} else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
|
|
|
|
if (CXI->isVolatile())
|
|
|
|
break;
|
|
|
|
} else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
|
|
|
|
!isa<LandingPadInst>(BBI)) {
|
2010-12-13 14:25:44 +08:00
|
|
|
break;
|
2011-08-16 07:59:28 +08:00
|
|
|
}
|
2011-08-17 04:41:17 +08:00
|
|
|
// Note that deleting LandingPad's here is in fact okay, although it
|
|
|
|
// involves a bit of subtle reasoning. If this inst is a LandingPad,
|
|
|
|
// all the predecessors of this block will be the unwind edges of Invokes,
|
|
|
|
// and we can therefore guarantee this block will be erased.
|
2011-08-16 07:59:28 +08:00
|
|
|
}
|
|
|
|
|
2011-03-09 08:48:33 +08:00
|
|
|
// Delete this instruction (any uses are guaranteed to be dead)
|
|
|
|
if (!BBI->use_empty())
|
|
|
|
BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
|
2010-12-14 14:17:25 +08:00
|
|
|
BBI->eraseFromParent();
|
2010-12-13 14:25:44 +08:00
|
|
|
Changed = true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If the unreachable instruction is the first in the block, take a gander
|
|
|
|
// at all of the predecessors of this instruction, and simplify them.
|
|
|
|
if (&BB->front() != UI) return Changed;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
|
|
|
|
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
|
|
|
|
TerminatorInst *TI = Preds[i]->getTerminator();
|
2011-05-19 08:09:21 +08:00
|
|
|
IRBuilder<> Builder(TI);
|
2010-12-13 14:25:44 +08:00
|
|
|
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
|
|
|
|
if (BI->isUnconditional()) {
|
|
|
|
if (BI->getSuccessor(0) == BB) {
|
|
|
|
new UnreachableInst(TI->getContext(), TI);
|
|
|
|
TI->eraseFromParent();
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (BI->getSuccessor(0) == BB) {
|
2011-05-19 08:09:21 +08:00
|
|
|
Builder.CreateBr(BI->getSuccessor(1));
|
2010-12-13 14:25:44 +08:00
|
|
|
EraseTerminatorInstAndDCECond(BI);
|
|
|
|
} else if (BI->getSuccessor(1) == BB) {
|
2011-05-19 08:09:21 +08:00
|
|
|
Builder.CreateBr(BI->getSuccessor(0));
|
2010-12-13 14:25:44 +08:00
|
|
|
EraseTerminatorInstAndDCECond(BI);
|
|
|
|
Changed = true;
|
2008-02-20 19:26:25 +08:00
|
|
|
}
|
2004-02-16 14:35:48 +08:00
|
|
|
}
|
2010-12-13 14:25:44 +08:00
|
|
|
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
|
2012-03-11 14:09:17 +08:00
|
|
|
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
|
2012-03-08 15:06:20 +08:00
|
|
|
i != e; ++i)
|
|
|
|
if (i.getCaseSuccessor() == BB) {
|
2010-12-13 14:25:44 +08:00
|
|
|
BB->removePredecessor(SI->getParent());
|
|
|
|
SI->removeCase(i);
|
|
|
|
--i; --e;
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
} else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
|
|
|
|
if (II->getUnwindDest() == BB) {
|
|
|
|
// Convert the invoke to a call instruction. This would be a good
|
|
|
|
// place to note that the call does not throw though.
|
2011-05-19 08:09:21 +08:00
|
|
|
BranchInst *BI = Builder.CreateBr(II->getNormalDest());
|
2010-12-13 14:25:44 +08:00
|
|
|
II->removeFromParent(); // Take out of symbol table
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Insert the call now...
|
|
|
|
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
|
2011-05-19 08:09:21 +08:00
|
|
|
Builder.SetInsertPoint(BI);
|
|
|
|
CallInst *CI = Builder.CreateCall(II->getCalledValue(),
|
2011-07-15 16:37:34 +08:00
|
|
|
Args, II->getName());
|
2010-12-13 09:47:07 +08:00
|
|
|
CI->setCallingConv(II->getCallingConv());
|
|
|
|
CI->setAttributes(II->getAttributes());
|
2010-12-13 14:25:44 +08:00
|
|
|
// If the invoke produced a value, the call does now instead.
|
2010-12-13 09:47:07 +08:00
|
|
|
II->replaceAllUsesWith(CI);
|
|
|
|
delete II;
|
|
|
|
Changed = true;
|
|
|
|
}
|
2004-02-24 13:54:22 +08:00
|
|
|
}
|
2010-12-13 14:25:44 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If this block is now dead, remove it.
|
2015-01-13 11:46:47 +08:00
|
|
|
if (pred_empty(BB) &&
|
2010-12-13 14:25:44 +08:00
|
|
|
BB != &BB->getParent()->getEntryBlock()) {
|
|
|
|
// We know there are no successors, so just nuke the block.
|
|
|
|
BB->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2004-02-25 00:09:21 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
return Changed;
|
|
|
|
}
|
2004-02-25 00:09:21 +08:00
|
|
|
|
2015-01-27 03:52:32 +08:00
|
|
|
static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
|
|
|
|
assert(Cases.size() >= 1);
|
2011-02-04 06:51:41 +08:00
|
|
|
|
|
|
|
array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
|
2015-01-27 03:52:32 +08:00
|
|
|
for (size_t I = 1, E = Cases.size(); I != E; ++I) {
|
|
|
|
if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
|
2011-02-04 06:51:41 +08:00
|
|
|
return false;
|
|
|
|
}
|
2015-01-27 03:52:32 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Turn a switch with two reachable destinations into an integer range
|
|
|
|
/// comparison and branch.
|
|
|
|
static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
|
|
|
|
assert(SI->getNumCases() > 1 && "Degenerate switch?");
|
2011-02-02 23:56:22 +08:00
|
|
|
|
2015-01-27 03:52:32 +08:00
|
|
|
bool HasDefault =
|
|
|
|
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
|
|
|
|
|
|
|
|
// Partition the cases into two sets with different destinations.
|
|
|
|
BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
|
|
|
|
BasicBlock *DestB = nullptr;
|
|
|
|
SmallVector <ConstantInt *, 16> CasesA;
|
|
|
|
SmallVector <ConstantInt *, 16> CasesB;
|
|
|
|
|
|
|
|
for (SwitchInst::CaseIt I : SI->cases()) {
|
|
|
|
BasicBlock *Dest = I.getCaseSuccessor();
|
|
|
|
if (!DestA) DestA = Dest;
|
|
|
|
if (Dest == DestA) {
|
|
|
|
CasesA.push_back(I.getCaseValue());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!DestB) DestB = Dest;
|
|
|
|
if (Dest == DestB) {
|
|
|
|
CasesB.push_back(I.getCaseValue());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false; // More than two destinations.
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(DestA && DestB && "Single-destination switch should have been folded.");
|
|
|
|
assert(DestA != DestB);
|
|
|
|
assert(DestB != SI->getDefaultDest());
|
|
|
|
assert(!CasesB.empty() && "There must be non-default cases.");
|
|
|
|
assert(!CasesA.empty() || HasDefault);
|
|
|
|
|
|
|
|
// Figure out if one of the sets of cases form a contiguous range.
|
|
|
|
SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
|
|
|
|
BasicBlock *ContiguousDest = nullptr;
|
|
|
|
BasicBlock *OtherDest = nullptr;
|
|
|
|
if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
|
|
|
|
ContiguousCases = &CasesA;
|
|
|
|
ContiguousDest = DestA;
|
|
|
|
OtherDest = DestB;
|
|
|
|
} else if (CasesAreContiguous(CasesB)) {
|
|
|
|
ContiguousCases = &CasesB;
|
|
|
|
ContiguousDest = DestB;
|
|
|
|
OtherDest = DestA;
|
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Start building the compare and branch.
|
|
|
|
|
|
|
|
Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
|
|
|
|
Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size());
|
2011-02-02 23:56:22 +08:00
|
|
|
|
2011-02-08 06:37:28 +08:00
|
|
|
Value *Sub = SI->getCondition();
|
|
|
|
if (!Offset->isNullValue())
|
2015-01-27 03:52:32 +08:00
|
|
|
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
|
|
|
|
|
2013-04-16 16:35:36 +08:00
|
|
|
Value *Cmp;
|
|
|
|
// If NumCases overflowed, then all possible values jump to the successor.
|
2015-01-27 03:52:32 +08:00
|
|
|
if (NumCases->isNullValue() && !ContiguousCases->empty())
|
2013-04-16 16:35:36 +08:00
|
|
|
Cmp = ConstantInt::getTrue(SI->getContext());
|
|
|
|
else
|
|
|
|
Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
|
2015-01-27 03:52:32 +08:00
|
|
|
BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
|
2011-02-02 23:56:22 +08:00
|
|
|
|
2012-09-18 08:47:33 +08:00
|
|
|
// Update weight for the newly-created conditional branch.
|
2015-01-27 03:52:32 +08:00
|
|
|
if (HasBranchWeights(SI)) {
|
|
|
|
SmallVector<uint64_t, 8> Weights;
|
2012-09-18 08:47:33 +08:00
|
|
|
GetBranchWeights(SI, Weights);
|
|
|
|
if (Weights.size() == 1 + SI->getNumCases()) {
|
2015-01-27 03:52:32 +08:00
|
|
|
uint64_t TrueWeight = 0;
|
|
|
|
uint64_t FalseWeight = 0;
|
|
|
|
for (size_t I = 0, E = Weights.size(); I != E; ++I) {
|
|
|
|
if (SI->getSuccessor(I) == ContiguousDest)
|
|
|
|
TrueWeight += Weights[I];
|
|
|
|
else
|
|
|
|
FalseWeight += Weights[I];
|
|
|
|
}
|
|
|
|
while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
|
|
|
|
TrueWeight /= 2;
|
|
|
|
FalseWeight /= 2;
|
|
|
|
}
|
2012-09-18 08:47:33 +08:00
|
|
|
NewBI->setMetadata(LLVMContext::MD_prof,
|
2015-01-27 03:52:32 +08:00
|
|
|
MDBuilder(SI->getContext()).createBranchWeights(
|
|
|
|
(uint32_t)TrueWeight, (uint32_t)FalseWeight));
|
2012-09-18 08:47:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-27 03:52:32 +08:00
|
|
|
// Prune obsolete incoming values off the successors' PHI nodes.
|
|
|
|
for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
|
|
|
|
unsigned PreviousEdges = ContiguousCases->size();
|
|
|
|
if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges;
|
|
|
|
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
|
2011-02-02 23:56:22 +08:00
|
|
|
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
|
|
|
|
}
|
2015-01-27 03:52:32 +08:00
|
|
|
for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
|
|
|
|
unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
|
|
|
|
if (OtherDest == SI->getDefaultDest()) ++PreviousEdges;
|
|
|
|
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
|
|
|
|
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Drop the switch.
|
2011-02-02 23:56:22 +08:00
|
|
|
SI->eraseFromParent();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2009-02-05 05:39:48 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Compute masked bits for the condition of a switch
|
2011-05-14 23:57:25 +08:00
|
|
|
/// and use it to remove dead cases.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
|
|
|
|
const DataLayout &DL) {
|
2011-05-14 23:57:25 +08:00
|
|
|
Value *Cond = SI->getCondition();
|
2013-09-06 08:37:24 +08:00
|
|
|
unsigned Bits = Cond->getType()->getIntegerBitWidth();
|
2011-05-14 23:57:25 +08:00
|
|
|
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
|
2015-01-04 20:03:27 +08:00
|
|
|
computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI);
|
2011-05-14 23:57:25 +08:00
|
|
|
|
|
|
|
// Gather dead cases.
|
|
|
|
SmallVector<ConstantInt*, 8> DeadCases;
|
2012-03-11 14:09:17 +08:00
|
|
|
for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
|
2012-03-08 15:06:20 +08:00
|
|
|
if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
|
|
|
|
(I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
|
|
|
|
DeadCases.push_back(I.getCaseValue());
|
2011-05-14 23:57:25 +08:00
|
|
|
DEBUG(dbgs() << "SimplifyCFG: switch case '"
|
2012-03-08 15:06:20 +08:00
|
|
|
<< I.getCaseValue() << "' is dead.\n");
|
2011-05-14 23:57:25 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-18 08:47:33 +08:00
|
|
|
SmallVector<uint64_t, 8> Weights;
|
|
|
|
bool HasWeight = HasBranchWeights(SI);
|
|
|
|
if (HasWeight) {
|
|
|
|
GetBranchWeights(SI, Weights);
|
|
|
|
HasWeight = (Weights.size() == 1 + SI->getNumCases());
|
|
|
|
}
|
|
|
|
|
2011-05-14 23:57:25 +08:00
|
|
|
// Remove dead cases from the switch.
|
|
|
|
for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
|
2012-03-08 15:06:20 +08:00
|
|
|
SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
|
2012-03-11 14:09:17 +08:00
|
|
|
assert(Case != SI->case_default() &&
|
SwitchInst refactoring.
The purpose of refactoring is to hide operand roles from SwitchInst user (programmer). If you want to play with operands directly, probably you will need lower level methods than SwitchInst ones (TerminatorInst or may be User). After this patch we can reorganize SwitchInst operands and successors as we want.
What was done:
1. Changed semantics of index inside the getCaseValue method:
getCaseValue(0) means "get first case", not a condition. Use getCondition() if you want to resolve the condition. I propose don't mix SwitchInst case indexing with low level indexing (TI successors indexing, User's operands indexing), since it may be dangerous.
2. By the same reason findCaseValue(ConstantInt*) returns actual number of case value. 0 means first case, not default. If there is no case with given value, ErrorIndex will returned.
3. Added getCaseSuccessor method. I propose to avoid usage of TerminatorInst::getSuccessor if you want to resolve case successor BB. Use getCaseSuccessor instead, since internal SwitchInst organization of operands/successors is hidden and may be changed in any moment.
4. Added resolveSuccessorIndex and resolveCaseIndex. The main purpose of these methods is to see how case successors are really mapped in TerminatorInst.
4.1 "resolveSuccessorIndex" was created if you need to level down from SwitchInst to TerminatorInst. It returns TerminatorInst's successor index for given case successor.
4.2 "resolveCaseIndex" converts low level successors index to case index that curresponds to the given successor.
Note: There are also related compatability fix patches for dragonegg, klee, llvm-gcc-4.0, llvm-gcc-4.2, safecode, clang.
llvm-svn: 149481
2012-02-01 15:49:51 +08:00
|
|
|
"Case was not found. Probably mistake in DeadCases forming.");
|
2012-09-18 08:47:33 +08:00
|
|
|
if (HasWeight) {
|
|
|
|
std::swap(Weights[Case.getCaseIndex()+1], Weights.back());
|
|
|
|
Weights.pop_back();
|
|
|
|
}
|
|
|
|
|
2011-05-14 23:57:25 +08:00
|
|
|
// Prune unused values from PHI nodes.
|
2012-03-08 15:06:20 +08:00
|
|
|
Case.getCaseSuccessor()->removePredecessor(SI->getParent());
|
2011-05-14 23:57:25 +08:00
|
|
|
SI->removeCase(Case);
|
|
|
|
}
|
2013-12-20 16:21:30 +08:00
|
|
|
if (HasWeight && Weights.size() >= 2) {
|
2012-09-18 08:47:33 +08:00
|
|
|
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
|
|
|
|
SI->setMetadata(LLVMContext::MD_prof,
|
|
|
|
MDBuilder(SI->getParent()->getContext()).
|
|
|
|
createBranchWeights(MDWeights));
|
|
|
|
}
|
2011-05-14 23:57:25 +08:00
|
|
|
|
|
|
|
return !DeadCases.empty();
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If BB would be eligible for simplification by
|
|
|
|
/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
|
2011-06-18 18:28:47 +08:00
|
|
|
/// by an unconditional branch), look at the phi node for BB in the successor
|
|
|
|
/// block and see if the incoming value is equal to CaseValue. If so, return
|
|
|
|
/// the phi node, and set PhiIndex to BB's index in the phi node.
|
|
|
|
static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
|
|
|
|
BasicBlock *BB,
|
|
|
|
int *PhiIndex) {
|
|
|
|
if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr; // BB must be empty to be a candidate for simplification.
|
2011-06-18 18:28:47 +08:00
|
|
|
if (!BB->getSinglePredecessor())
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr; // BB must be dominated by the switch.
|
2011-06-18 18:28:47 +08:00
|
|
|
|
|
|
|
BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
|
|
|
|
if (!Branch || !Branch->isUnconditional())
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr; // Terminator must be unconditional branch.
|
2011-06-18 18:28:47 +08:00
|
|
|
|
|
|
|
BasicBlock *Succ = Branch->getSuccessor(0);
|
|
|
|
|
|
|
|
BasicBlock::iterator I = Succ->begin();
|
|
|
|
while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
|
|
|
|
int Idx = PHI->getBasicBlockIndex(BB);
|
|
|
|
assert(Idx >= 0 && "PHI has no entry for predecessor?");
|
|
|
|
|
|
|
|
Value *InValue = PHI->getIncomingValue(Idx);
|
|
|
|
if (InValue != CaseValue) continue;
|
|
|
|
|
|
|
|
*PhiIndex = Idx;
|
|
|
|
return PHI;
|
|
|
|
}
|
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-06-18 18:28:47 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Try to forward the condition of a switch instruction to a phi node
|
|
|
|
/// dominated by the switch, if that would mean that some of the destination
|
|
|
|
/// blocks of the switch can be folded away.
|
2011-06-18 18:28:47 +08:00
|
|
|
/// Returns true if a change is made.
|
|
|
|
static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
|
|
|
|
typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
|
|
|
|
ForwardingNodesMap ForwardingNodes;
|
|
|
|
|
2012-03-11 14:09:17 +08:00
|
|
|
for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
|
2012-03-08 15:06:20 +08:00
|
|
|
ConstantInt *CaseValue = I.getCaseValue();
|
|
|
|
BasicBlock *CaseDest = I.getCaseSuccessor();
|
2011-06-18 18:28:47 +08:00
|
|
|
|
|
|
|
int PhiIndex;
|
|
|
|
PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
|
|
|
|
&PhiIndex);
|
|
|
|
if (!PHI) continue;
|
|
|
|
|
|
|
|
ForwardingNodes[PHI].push_back(PhiIndex);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
|
|
|
|
E = ForwardingNodes.end(); I != E; ++I) {
|
|
|
|
PHINode *Phi = I->first;
|
2013-07-14 12:42:23 +08:00
|
|
|
SmallVectorImpl<int> &Indexes = I->second;
|
2011-06-18 18:28:47 +08:00
|
|
|
|
|
|
|
if (Indexes.size() < 2) continue;
|
|
|
|
|
|
|
|
for (size_t I = 0, E = Indexes.size(); I != E; ++I)
|
|
|
|
Phi->setIncomingValue(Indexes[I], SI->getCondition());
|
|
|
|
Changed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if the backend will be able to handle
|
2012-09-06 17:43:28 +08:00
|
|
|
/// initializing an array of constants like C.
|
2012-09-07 16:22:57 +08:00
|
|
|
static bool ValidLookupTableConstant(Constant *C) {
|
2014-06-20 08:38:12 +08:00
|
|
|
if (C->isThreadDependent())
|
|
|
|
return false;
|
|
|
|
if (C->isDLLImportDependent())
|
|
|
|
return false;
|
2012-09-06 17:43:28 +08:00
|
|
|
|
2014-06-26 08:30:52 +08:00
|
|
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
|
|
|
|
return CE->isGEPWithNoNotionalOverIndexing();
|
|
|
|
|
2012-09-06 17:43:28 +08:00
|
|
|
return isa<ConstantFP>(C) ||
|
|
|
|
isa<ConstantInt>(C) ||
|
|
|
|
isa<ConstantPointerNull>(C) ||
|
|
|
|
isa<GlobalValue>(C) ||
|
|
|
|
isa<UndefValue>(C);
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If V is a Constant, return it. Otherwise, try to look up
|
2012-10-31 23:31:09 +08:00
|
|
|
/// its constant value in ConstantPool, returning 0 if it's not there.
|
2012-10-31 23:14:39 +08:00
|
|
|
static Constant *LookupConstant(Value *V,
|
|
|
|
const SmallDenseMap<Value*, Constant*>& ConstantPool) {
|
|
|
|
if (Constant *C = dyn_cast<Constant>(V))
|
|
|
|
return C;
|
|
|
|
return ConstantPool.lookup(V);
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Try to fold instruction I into a constant. This works for
|
2012-10-31 21:42:45 +08:00
|
|
|
/// simple instructions such as binary operations where both operands are
|
|
|
|
/// constant or can be replaced by constants from the ConstantPool. Returns the
|
2012-10-31 23:31:09 +08:00
|
|
|
/// resulting constant on success, 0 otherwise.
|
2013-11-12 20:24:36 +08:00
|
|
|
static Constant *
|
2015-03-10 10:37:25 +08:00
|
|
|
ConstantFold(Instruction *I, const DataLayout &DL,
|
|
|
|
const SmallDenseMap<Value *, Constant *> &ConstantPool) {
|
2012-10-31 21:42:45 +08:00
|
|
|
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
|
2012-10-31 23:14:39 +08:00
|
|
|
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
|
|
|
|
if (!A)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-10-31 23:14:39 +08:00
|
|
|
if (A->isAllOnesValue())
|
|
|
|
return LookupConstant(Select->getTrueValue(), ConstantPool);
|
|
|
|
if (A->isNullValue())
|
|
|
|
return LookupConstant(Select->getFalseValue(), ConstantPool);
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-10-31 21:42:45 +08:00
|
|
|
}
|
|
|
|
|
2013-11-12 20:24:36 +08:00
|
|
|
SmallVector<Constant *, 4> COps;
|
|
|
|
for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
|
|
|
|
if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
|
|
|
|
COps.push_back(A);
|
|
|
|
else
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-10-31 21:42:45 +08:00
|
|
|
}
|
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
|
2013-11-12 20:24:36 +08:00
|
|
|
return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
|
|
|
|
COps[1], DL);
|
2015-03-10 10:37:25 +08:00
|
|
|
}
|
2013-11-12 20:24:36 +08:00
|
|
|
|
|
|
|
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
|
2012-10-31 21:42:45 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Try to determine the resulting constant values in phi nodes
|
2012-10-31 21:42:45 +08:00
|
|
|
/// at the common destination basic block, *CommonDest, for one of the case
|
2012-10-31 23:31:09 +08:00
|
|
|
/// destionations CaseDest corresponding to value CaseVal (0 for the default
|
2012-10-31 21:42:45 +08:00
|
|
|
/// case), of a switch instruction SI.
|
2013-07-14 12:42:23 +08:00
|
|
|
static bool
|
2015-03-10 10:37:25 +08:00
|
|
|
GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
|
2013-07-14 12:42:23 +08:00
|
|
|
BasicBlock **CommonDest,
|
2015-03-10 10:37:25 +08:00
|
|
|
SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
|
|
|
|
const DataLayout &DL) {
|
2012-09-06 17:43:28 +08:00
|
|
|
// The block from which we enter the common destination.
|
|
|
|
BasicBlock *Pred = SI->getParent();
|
|
|
|
|
2012-10-31 21:42:45 +08:00
|
|
|
// If CaseDest is empty except for some side-effect free instructions through
|
|
|
|
// which we can constant-propagate the CaseVal, continue to its successor.
|
|
|
|
SmallDenseMap<Value*, Constant*> ConstantPool;
|
|
|
|
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
|
|
|
|
for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E;
|
|
|
|
++I) {
|
|
|
|
if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
|
|
|
|
// If the terminator is a simple branch, continue to the next block.
|
|
|
|
if (T->getNumSuccessors() != 1)
|
|
|
|
return false;
|
|
|
|
Pred = CaseDest;
|
|
|
|
CaseDest = T->getSuccessor(0);
|
|
|
|
} else if (isa<DbgInfoIntrinsic>(I)) {
|
|
|
|
// Skip debug intrinsic.
|
|
|
|
continue;
|
2015-03-10 10:37:25 +08:00
|
|
|
} else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
|
2012-10-31 21:42:45 +08:00
|
|
|
// Instruction is side-effect free and constant.
|
2015-01-10 06:13:31 +08:00
|
|
|
|
|
|
|
// If the instruction has uses outside this block or a phi node slot for
|
|
|
|
// the block, it is not safe to bypass the instruction since it would then
|
|
|
|
// no longer dominate all its uses.
|
|
|
|
for (auto &Use : I->uses()) {
|
|
|
|
User *User = Use.getUser();
|
|
|
|
if (Instruction *I = dyn_cast<Instruction>(User))
|
|
|
|
if (I->getParent() == CaseDest)
|
|
|
|
continue;
|
|
|
|
if (PHINode *Phi = dyn_cast<PHINode>(User))
|
|
|
|
if (Phi->getIncomingBlock(Use) == CaseDest)
|
|
|
|
continue;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-10-31 21:42:45 +08:00
|
|
|
ConstantPool.insert(std::make_pair(I, C));
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we did not have a CommonDest before, use the current one.
|
|
|
|
if (!*CommonDest)
|
|
|
|
*CommonDest = CaseDest;
|
|
|
|
// If the destination isn't the common one, abort.
|
|
|
|
if (CaseDest != *CommonDest)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Get the values for this case from phi nodes in the destination block.
|
|
|
|
BasicBlock::iterator I = (*CommonDest)->begin();
|
|
|
|
while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
|
|
|
|
int Idx = PHI->getBasicBlockIndex(Pred);
|
|
|
|
if (Idx == -1)
|
|
|
|
continue;
|
|
|
|
|
2012-10-31 23:14:39 +08:00
|
|
|
Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx),
|
|
|
|
ConstantPool);
|
2012-09-06 17:43:28 +08:00
|
|
|
if (!ConstVal)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Be conservative about which kinds of constants we support.
|
|
|
|
if (!ValidLookupTableConstant(ConstVal))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Res.push_back(std::make_pair(PHI, ConstVal));
|
|
|
|
}
|
|
|
|
|
2014-01-12 08:44:41 +08:00
|
|
|
return Res.size() > 0;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Helper function used to add CaseVal to the list of cases that generate
|
|
|
|
// Result.
|
2014-10-14 09:58:26 +08:00
|
|
|
static void MapCaseToResult(ConstantInt *CaseVal,
|
|
|
|
SwitchCaseResultVectorTy &UniqueResults,
|
|
|
|
Constant *Result) {
|
|
|
|
for (auto &I : UniqueResults) {
|
|
|
|
if (I.first == Result) {
|
|
|
|
I.second.push_back(CaseVal);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
UniqueResults.push_back(std::make_pair(Result,
|
|
|
|
SmallVector<ConstantInt*, 4>(1, CaseVal)));
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Helper function that initializes a map containing
|
2014-10-14 09:58:26 +08:00
|
|
|
// results for the PHI node of the common destination block for a switch
|
|
|
|
// instruction. Returns false if multiple PHI nodes have been found or if
|
|
|
|
// there is not a common destination block for the switch.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
|
|
|
|
BasicBlock *&CommonDest,
|
|
|
|
SwitchCaseResultVectorTy &UniqueResults,
|
|
|
|
Constant *&DefaultResult,
|
|
|
|
const DataLayout &DL) {
|
2014-10-14 09:58:26 +08:00
|
|
|
for (auto &I : SI->cases()) {
|
|
|
|
ConstantInt *CaseVal = I.getCaseValue();
|
|
|
|
|
|
|
|
// Resulting value at phi nodes for this case value.
|
|
|
|
SwitchCaseResultsTy Results;
|
|
|
|
if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
|
|
|
|
DL))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Only one value per case is permitted
|
|
|
|
if (Results.size() > 1)
|
|
|
|
return false;
|
|
|
|
MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
|
|
|
|
|
|
|
|
// Check the PHI consistency.
|
|
|
|
if (!PHI)
|
|
|
|
PHI = Results[0].first;
|
|
|
|
else if (PHI != Results[0].first)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// Find the default result value.
|
|
|
|
SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
|
|
|
|
BasicBlock *DefaultDest = SI->getDefaultDest();
|
|
|
|
GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
|
|
|
|
DL);
|
|
|
|
// If the default value is not found abort unless the default destination
|
|
|
|
// is unreachable.
|
|
|
|
DefaultResult =
|
|
|
|
DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
|
|
|
|
if ((!DefaultResult &&
|
|
|
|
!isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Helper function that checks if it is possible to transform a switch with only
|
|
|
|
// two cases (or two cases + default) that produces a result into a select.
|
2014-10-14 09:58:26 +08:00
|
|
|
// Example:
|
|
|
|
// switch (a) {
|
|
|
|
// case 10: %0 = icmp eq i32 %a, 10
|
|
|
|
// return 10; %1 = select i1 %0, i32 10, i32 4
|
|
|
|
// case 20: ----> %2 = icmp eq i32 %a, 20
|
|
|
|
// return 2; %3 = select i1 %2, i32 2, i32 %1
|
|
|
|
// default:
|
|
|
|
// return 4;
|
|
|
|
// }
|
|
|
|
static Value *
|
|
|
|
ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
|
|
|
|
Constant *DefaultResult, Value *Condition,
|
|
|
|
IRBuilder<> &Builder) {
|
|
|
|
assert(ResultVector.size() == 2 &&
|
|
|
|
"We should have exactly two unique results at this point");
|
|
|
|
// If we are selecting between only two cases transform into a simple
|
|
|
|
// select or a two-way select if default is possible.
|
|
|
|
if (ResultVector[0].second.size() == 1 &&
|
|
|
|
ResultVector[1].second.size() == 1) {
|
|
|
|
ConstantInt *const FirstCase = ResultVector[0].second[0];
|
|
|
|
ConstantInt *const SecondCase = ResultVector[1].second[0];
|
|
|
|
|
|
|
|
bool DefaultCanTrigger = DefaultResult;
|
|
|
|
Value *SelectValue = ResultVector[1].first;
|
|
|
|
if (DefaultCanTrigger) {
|
|
|
|
Value *const ValueCompare =
|
|
|
|
Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
|
|
|
|
SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
|
|
|
|
DefaultResult, "switch.select");
|
|
|
|
}
|
|
|
|
Value *const ValueCompare =
|
|
|
|
Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
|
|
|
|
return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue,
|
|
|
|
"switch.select");
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
// Helper function to cleanup a switch instruction that has been converted into
|
|
|
|
// a select, fixing up PHI nodes and basic blocks.
|
2014-10-14 09:58:26 +08:00
|
|
|
static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
|
|
|
|
Value *SelectValue,
|
|
|
|
IRBuilder<> &Builder) {
|
|
|
|
BasicBlock *SelectBB = SI->getParent();
|
|
|
|
while (PHI->getBasicBlockIndex(SelectBB) >= 0)
|
|
|
|
PHI->removeIncomingValue(SelectBB);
|
|
|
|
PHI->addIncoming(SelectValue, SelectBB);
|
|
|
|
|
|
|
|
Builder.CreateBr(PHI->getParent());
|
|
|
|
|
|
|
|
// Remove the switch.
|
|
|
|
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
|
|
|
|
BasicBlock *Succ = SI->getSuccessor(i);
|
|
|
|
|
|
|
|
if (Succ == PHI->getParent())
|
|
|
|
continue;
|
|
|
|
Succ->removePredecessor(SelectBB);
|
|
|
|
}
|
|
|
|
SI->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If the switch is only used to initialize one or more
|
2014-10-14 09:58:26 +08:00
|
|
|
/// phi nodes in a common successor block with only two different
|
|
|
|
/// constant values, replace the switch with select.
|
|
|
|
static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
|
2015-03-10 10:37:25 +08:00
|
|
|
AssumptionCache *AC, const DataLayout &DL) {
|
2014-10-14 09:58:26 +08:00
|
|
|
Value *const Cond = SI->getCondition();
|
|
|
|
PHINode *PHI = nullptr;
|
|
|
|
BasicBlock *CommonDest = nullptr;
|
|
|
|
Constant *DefaultResult;
|
|
|
|
SwitchCaseResultVectorTy UniqueResults;
|
|
|
|
// Collect all the cases that will deliver the same value from the switch.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
|
|
|
|
DL))
|
2014-10-14 09:58:26 +08:00
|
|
|
return false;
|
|
|
|
// Selects choose between maximum two values.
|
|
|
|
if (UniqueResults.size() != 2)
|
|
|
|
return false;
|
|
|
|
assert(PHI != nullptr && "PHI for value select not found");
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(SI);
|
|
|
|
Value *SelectValue = ConvertTwoCaseSwitch(
|
|
|
|
UniqueResults,
|
|
|
|
DefaultResult, Cond, Builder);
|
|
|
|
if (SelectValue) {
|
|
|
|
RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// The switch couldn't be converted into a select.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
namespace {
|
2015-06-25 04:40:57 +08:00
|
|
|
/// This class represents a lookup table that can be used to replace a switch.
|
2012-09-26 17:34:53 +08:00
|
|
|
class SwitchLookupTable {
|
|
|
|
public:
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Create a lookup table to use as a switch replacement with the contents
|
|
|
|
/// of Values, using DefaultValue to fill any holes in the table.
|
2015-03-10 10:37:25 +08:00
|
|
|
SwitchLookupTable(
|
|
|
|
Module &M, uint64_t TableSize, ConstantInt *Offset,
|
|
|
|
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
|
|
|
|
Constant *DefaultValue, const DataLayout &DL);
|
2012-09-26 17:34:53 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Build instructions with Builder to retrieve the value at
|
2012-09-26 17:34:53 +08:00
|
|
|
/// the position given by Index in the lookup table.
|
2014-07-25 05:13:20 +08:00
|
|
|
Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
|
2012-09-26 17:34:53 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Return true if a table with TableSize elements of
|
2012-09-26 17:44:49 +08:00
|
|
|
/// type ElementType would fit in a target-legal register.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
|
2012-09-26 17:44:49 +08:00
|
|
|
const Type *ElementType);
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
private:
|
|
|
|
// Depending on the contents of the table, it can be represented in
|
|
|
|
// different ways.
|
|
|
|
enum {
|
|
|
|
// For tables where each element contains the same value, we just have to
|
|
|
|
// store that single value and return it for each lookup.
|
|
|
|
SingleValueKind,
|
|
|
|
|
2014-11-17 17:13:57 +08:00
|
|
|
// For tables where there is a linear relationship between table index
|
|
|
|
// and values. We calculate the result with a simple multiplication
|
|
|
|
// and addition instead of a table lookup.
|
|
|
|
LinearMapKind,
|
|
|
|
|
2012-09-26 17:44:49 +08:00
|
|
|
// For small tables with integer elements, we can pack them into a bitmap
|
|
|
|
// that fits into a target-legal register. Values are retrieved by
|
|
|
|
// shift and mask operations.
|
|
|
|
BitMapKind,
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
// The table is stored as an array of values. Values are retrieved by load
|
|
|
|
// instructions from the table.
|
|
|
|
ArrayKind
|
|
|
|
} Kind;
|
|
|
|
|
|
|
|
// For SingleValueKind, this is the single value.
|
|
|
|
Constant *SingleValue;
|
|
|
|
|
2012-09-26 17:44:49 +08:00
|
|
|
// For BitMapKind, this is the bitmap.
|
|
|
|
ConstantInt *BitMap;
|
|
|
|
IntegerType *BitMapElementTy;
|
|
|
|
|
2014-11-17 17:13:57 +08:00
|
|
|
// For LinearMapKind, these are the constants used to derive the value.
|
|
|
|
ConstantInt *LinearOffset;
|
|
|
|
ConstantInt *LinearMultiplier;
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
// For ArrayKind, this is the array.
|
|
|
|
GlobalVariable *Array;
|
|
|
|
};
|
2015-06-23 17:49:53 +08:00
|
|
|
}
|
2012-09-26 17:34:53 +08:00
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
SwitchLookupTable::SwitchLookupTable(
|
|
|
|
Module &M, uint64_t TableSize, ConstantInt *Offset,
|
|
|
|
const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
|
|
|
|
Constant *DefaultValue, const DataLayout &DL)
|
2014-04-25 13:29:35 +08:00
|
|
|
: SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
|
2014-11-17 17:13:57 +08:00
|
|
|
LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
|
2012-09-26 19:07:37 +08:00
|
|
|
assert(Values.size() && "Can't build lookup table without values!");
|
|
|
|
assert(TableSize >= Values.size() && "Can't fit values in table!");
|
2012-09-06 17:43:28 +08:00
|
|
|
|
|
|
|
// If all values in the table are equal, this is that value.
|
2012-09-26 17:34:53 +08:00
|
|
|
SingleValue = Values.begin()->second;
|
2012-09-06 17:43:28 +08:00
|
|
|
|
2014-01-12 08:44:41 +08:00
|
|
|
Type *ValueType = Values.begin()->second->getType();
|
|
|
|
|
2012-09-06 17:43:28 +08:00
|
|
|
// Build up the table contents.
|
2012-09-26 17:34:53 +08:00
|
|
|
SmallVector<Constant*, 64> TableContents(TableSize);
|
|
|
|
for (size_t I = 0, E = Values.size(); I != E; ++I) {
|
|
|
|
ConstantInt *CaseVal = Values[I].first;
|
|
|
|
Constant *CaseRes = Values[I].second;
|
2014-01-12 08:44:41 +08:00
|
|
|
assert(CaseRes->getType() == ValueType);
|
2012-09-26 17:34:53 +08:00
|
|
|
|
|
|
|
uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
|
|
|
|
.getLimitedValue();
|
2012-09-06 17:43:28 +08:00
|
|
|
TableContents[Idx] = CaseRes;
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
if (CaseRes != SingleValue)
|
2014-04-25 13:29:35 +08:00
|
|
|
SingleValue = nullptr;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Fill in any holes in the table with the default result.
|
2012-09-26 17:34:53 +08:00
|
|
|
if (Values.size() < TableSize) {
|
2014-07-03 16:29:06 +08:00
|
|
|
assert(DefaultValue &&
|
|
|
|
"Need a default value to fill the lookup table holes.");
|
2014-01-12 08:44:41 +08:00
|
|
|
assert(DefaultValue->getType() == ValueType);
|
2012-09-26 17:34:53 +08:00
|
|
|
for (uint64_t I = 0; I < TableSize; ++I) {
|
|
|
|
if (!TableContents[I])
|
|
|
|
TableContents[I] = DefaultValue;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
if (DefaultValue != SingleValue)
|
2014-04-25 13:29:35 +08:00
|
|
|
SingleValue = nullptr;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
// If each element in the table contains the same value, we only need to store
|
|
|
|
// that single value.
|
|
|
|
if (SingleValue) {
|
|
|
|
Kind = SingleValueKind;
|
|
|
|
return;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2014-11-17 17:13:57 +08:00
|
|
|
// Check if we can derive the value with a linear transformation from the
|
|
|
|
// table index.
|
|
|
|
if (isa<IntegerType>(ValueType)) {
|
|
|
|
bool LinearMappingPossible = true;
|
|
|
|
APInt PrevVal;
|
|
|
|
APInt DistToPrev;
|
|
|
|
assert(TableSize >= 2 && "Should be a SingleValue table.");
|
|
|
|
// Check if there is the same distance between two consecutive values.
|
|
|
|
for (uint64_t I = 0; I < TableSize; ++I) {
|
|
|
|
ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
|
|
|
|
if (!ConstVal) {
|
|
|
|
// This is an undef. We could deal with it, but undefs in lookup tables
|
|
|
|
// are very seldom. It's probably not worth the additional complexity.
|
|
|
|
LinearMappingPossible = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
APInt Val = ConstVal->getValue();
|
|
|
|
if (I != 0) {
|
|
|
|
APInt Dist = Val - PrevVal;
|
|
|
|
if (I == 1) {
|
|
|
|
DistToPrev = Dist;
|
|
|
|
} else if (Dist != DistToPrev) {
|
|
|
|
LinearMappingPossible = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PrevVal = Val;
|
|
|
|
}
|
|
|
|
if (LinearMappingPossible) {
|
|
|
|
LinearOffset = cast<ConstantInt>(TableContents[0]);
|
|
|
|
LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
|
|
|
|
Kind = LinearMapKind;
|
|
|
|
++NumLinearMaps;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-26 17:44:49 +08:00
|
|
|
// If the type is integer and the table fits in a register, build a bitmap.
|
2014-02-21 08:06:31 +08:00
|
|
|
if (WouldFitInRegister(DL, TableSize, ValueType)) {
|
2014-01-12 08:44:41 +08:00
|
|
|
IntegerType *IT = cast<IntegerType>(ValueType);
|
2012-09-26 17:44:49 +08:00
|
|
|
APInt TableInt(TableSize * IT->getBitWidth(), 0);
|
|
|
|
for (uint64_t I = TableSize; I > 0; --I) {
|
|
|
|
TableInt <<= IT->getBitWidth();
|
2012-10-01 19:31:48 +08:00
|
|
|
// Insert values into the bitmap. Undef values are set to zero.
|
|
|
|
if (!isa<UndefValue>(TableContents[I - 1])) {
|
|
|
|
ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
|
|
|
|
TableInt |= Val->getValue().zext(TableInt.getBitWidth());
|
|
|
|
}
|
2012-09-26 17:44:49 +08:00
|
|
|
}
|
|
|
|
BitMap = ConstantInt::get(M.getContext(), TableInt);
|
|
|
|
BitMapElementTy = IT;
|
|
|
|
Kind = BitMapKind;
|
|
|
|
++NumBitMaps;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
// Store the table in an array.
|
2014-01-12 08:44:41 +08:00
|
|
|
ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
|
2012-09-06 17:43:28 +08:00
|
|
|
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
|
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
|
|
|
|
GlobalVariable::PrivateLinkage,
|
|
|
|
Initializer,
|
|
|
|
"switch.table");
|
|
|
|
Array->setUnnamedAddr(true);
|
|
|
|
Kind = ArrayKind;
|
|
|
|
}
|
|
|
|
|
2014-07-25 05:13:20 +08:00
|
|
|
Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
|
2012-09-26 17:34:53 +08:00
|
|
|
switch (Kind) {
|
|
|
|
case SingleValueKind:
|
|
|
|
return SingleValue;
|
2014-11-17 17:13:57 +08:00
|
|
|
case LinearMapKind: {
|
|
|
|
// Derive the result value from the input value.
|
|
|
|
Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
|
|
|
|
false, "switch.idx.cast");
|
|
|
|
if (!LinearMultiplier->isOne())
|
|
|
|
Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
|
|
|
|
if (!LinearOffset->isZero())
|
|
|
|
Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
|
|
|
|
return Result;
|
|
|
|
}
|
2012-09-26 17:44:49 +08:00
|
|
|
case BitMapKind: {
|
|
|
|
// Type of the bitmap (e.g. i59).
|
|
|
|
IntegerType *MapTy = BitMap->getType();
|
|
|
|
|
|
|
|
// Cast Index to the same type as the bitmap.
|
|
|
|
// Note: The Index is <= the number of elements in the table, so
|
|
|
|
// truncating it to the width of the bitmask is safe.
|
2012-09-26 22:01:53 +08:00
|
|
|
Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
|
2012-09-26 17:44:49 +08:00
|
|
|
|
|
|
|
// Multiply the shift amount by the element width.
|
|
|
|
ShiftAmt = Builder.CreateMul(ShiftAmt,
|
|
|
|
ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
|
|
|
|
"switch.shiftamt");
|
|
|
|
|
|
|
|
// Shift down.
|
|
|
|
Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
|
|
|
|
"switch.downshift");
|
|
|
|
// Mask off.
|
|
|
|
return Builder.CreateTrunc(DownShifted, BitMapElementTy,
|
|
|
|
"switch.masked");
|
|
|
|
}
|
2012-09-26 17:34:53 +08:00
|
|
|
case ArrayKind: {
|
2014-07-24 07:13:23 +08:00
|
|
|
// Make sure the table index will not overflow when treated as signed.
|
2014-07-25 05:13:20 +08:00
|
|
|
IntegerType *IT = cast<IntegerType>(Index->getType());
|
|
|
|
uint64_t TableSize = Array->getInitializer()->getType()
|
|
|
|
->getArrayNumElements();
|
|
|
|
if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
|
|
|
|
Index = Builder.CreateZExt(Index,
|
|
|
|
IntegerType::get(IT->getContext(),
|
|
|
|
IT->getBitWidth() + 1),
|
|
|
|
"switch.tableidx.zext");
|
2014-07-24 07:13:23 +08:00
|
|
|
|
2012-09-26 17:34:53 +08:00
|
|
|
Value *GEPIndices[] = { Builder.getInt32(0), Index };
|
2015-04-04 05:33:42 +08:00
|
|
|
Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
|
|
|
|
GEPIndices, "switch.gep");
|
2012-09-26 17:34:53 +08:00
|
|
|
return Builder.CreateLoad(GEP, "switch.load");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
llvm_unreachable("Unknown lookup table kind!");
|
|
|
|
}
|
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
|
2012-09-26 17:44:49 +08:00
|
|
|
uint64_t TableSize,
|
|
|
|
const Type *ElementType) {
|
|
|
|
const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
|
|
|
|
if (!IT)
|
|
|
|
return false;
|
|
|
|
// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
|
|
|
|
// are <= 15, we could try to narrow the type.
|
2012-09-28 02:29:58 +08:00
|
|
|
|
|
|
|
// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
|
|
|
|
if (TableSize >= UINT_MAX/IT->getBitWidth())
|
|
|
|
return false;
|
2015-03-10 10:37:25 +08:00
|
|
|
return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
|
2012-09-26 17:44:49 +08:00
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// Determine whether a lookup table should be built for this switch, based on
|
|
|
|
/// the number of cases, size of the table, and the types of the results.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool
|
|
|
|
ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
|
|
|
|
const TargetTransformInfo &TTI, const DataLayout &DL,
|
|
|
|
const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
|
2012-09-26 19:07:37 +08:00
|
|
|
if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
|
|
|
|
return false; // TableSize overflowed, or mul below might overflow.
|
2012-09-26 17:34:53 +08:00
|
|
|
|
2012-11-30 17:26:25 +08:00
|
|
|
bool AllTablesFitInRegister = true;
|
2012-11-30 10:02:42 +08:00
|
|
|
bool HasIllegalType = false;
|
2014-11-18 10:37:11 +08:00
|
|
|
for (const auto &I : ResultTypes) {
|
|
|
|
Type *Ty = I.second;
|
2012-11-30 17:34:29 +08:00
|
|
|
|
|
|
|
// Saturate this flag to true.
|
2013-01-07 11:53:25 +08:00
|
|
|
HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
|
2012-11-30 17:34:29 +08:00
|
|
|
|
|
|
|
// Saturate this flag to false.
|
|
|
|
AllTablesFitInRegister = AllTablesFitInRegister &&
|
2014-02-21 08:06:31 +08:00
|
|
|
SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
|
2012-11-30 17:34:29 +08:00
|
|
|
|
|
|
|
// If both flags saturate, we're done. NOTE: This *only* works with
|
|
|
|
// saturating flags, and all flags have to saturate first due to the
|
|
|
|
// non-deterministic behavior of iterating over a dense map.
|
|
|
|
if (HasIllegalType && !AllTablesFitInRegister)
|
2012-11-30 10:02:42 +08:00
|
|
|
break;
|
2012-09-26 17:44:49 +08:00
|
|
|
}
|
2012-11-30 10:02:42 +08:00
|
|
|
|
2012-11-30 17:26:25 +08:00
|
|
|
// If each table would fit in a register, we should build it anyway.
|
|
|
|
if (AllTablesFitInRegister)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Don't build a table that doesn't fit in-register if it has illegal types.
|
|
|
|
if (HasIllegalType)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The table density should be at least 40%. This is the same criterion as for
|
|
|
|
// jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
|
|
|
|
// FIXME: Find the best cut-off.
|
|
|
|
return SI->getNumCases() * 10 >= TableSize * 4;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2014-11-27 23:13:14 +08:00
|
|
|
/// Try to reuse the switch table index compare. Following pattern:
|
|
|
|
/// \code
|
|
|
|
/// if (idx < tablesize)
|
|
|
|
/// r = table[idx]; // table does not contain default_value
|
|
|
|
/// else
|
|
|
|
/// r = default_value;
|
|
|
|
/// if (r != default_value)
|
|
|
|
/// ...
|
|
|
|
/// \endcode
|
|
|
|
/// Is optimized to:
|
|
|
|
/// \code
|
|
|
|
/// cond = idx < tablesize;
|
|
|
|
/// if (cond)
|
|
|
|
/// r = table[idx];
|
|
|
|
/// else
|
|
|
|
/// r = default_value;
|
|
|
|
/// if (cond)
|
|
|
|
/// ...
|
|
|
|
/// \endcode
|
|
|
|
/// Jump threading will then eliminate the second if(cond).
|
|
|
|
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
|
|
|
|
BranchInst *RangeCheckBranch, Constant *DefaultValue,
|
|
|
|
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values) {
|
|
|
|
|
|
|
|
ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
|
|
|
|
if (!CmpInst)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// We require that the compare is in the same block as the phi so that jump
|
|
|
|
// threading can do its work afterwards.
|
|
|
|
if (CmpInst->getParent() != PhiBlock)
|
|
|
|
return;
|
|
|
|
|
|
|
|
Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
|
|
|
|
if (!CmpOp1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
Value *RangeCmp = RangeCheckBranch->getCondition();
|
|
|
|
Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
|
|
|
|
Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
|
|
|
|
|
|
|
|
// Check if the compare with the default value is constant true or false.
|
|
|
|
Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
|
|
|
|
DefaultValue, CmpOp1, true);
|
|
|
|
if (DefaultConst != TrueConst && DefaultConst != FalseConst)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Check if the compare with the case values is distinct from the default
|
|
|
|
// compare result.
|
|
|
|
for (auto ValuePair : Values) {
|
|
|
|
Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
|
|
|
|
ValuePair.second, CmpOp1, true);
|
|
|
|
if (!CaseConst || CaseConst == DefaultConst)
|
|
|
|
return;
|
|
|
|
assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
|
|
|
|
"Expect true or false as compare result.");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the branch instruction dominates the phi node. It's a simple
|
|
|
|
// dominance check, but sufficient for our needs.
|
|
|
|
// Although this check is invariant in the calling loops, it's better to do it
|
|
|
|
// at this late stage. Practically we do it at most once for a switch.
|
|
|
|
BasicBlock *BranchBlock = RangeCheckBranch->getParent();
|
|
|
|
for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) {
|
|
|
|
BasicBlock *Pred = *PI;
|
|
|
|
if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DefaultConst == FalseConst) {
|
|
|
|
// The compare yields the same result. We can replace it.
|
|
|
|
CmpInst->replaceAllUsesWith(RangeCmp);
|
|
|
|
++NumTableCmpReuses;
|
|
|
|
} else {
|
|
|
|
// The compare yields the same result, just inverted. We can replace it.
|
|
|
|
Value *InvertedTableCmp = BinaryOperator::CreateXor(RangeCmp,
|
|
|
|
ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
|
|
|
|
RangeCheckBranch);
|
|
|
|
CmpInst->replaceAllUsesWith(InvertedTableCmp);
|
|
|
|
++NumTableCmpReuses;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// If the switch is only used to initialize one or more phi nodes in a common
|
|
|
|
/// successor block with different constant values, replace the switch with
|
|
|
|
/// lookup tables.
|
2015-03-10 10:37:25 +08:00
|
|
|
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
|
|
|
|
const DataLayout &DL,
|
|
|
|
const TargetTransformInfo &TTI) {
|
2012-09-06 17:43:28 +08:00
|
|
|
assert(SI->getNumCases() > 1 && "Degenerate switch?");
|
2012-10-30 19:23:25 +08:00
|
|
|
|
2012-11-08 05:35:12 +08:00
|
|
|
// Only build lookup table when we have a target that supports it.
|
2013-01-07 11:53:25 +08:00
|
|
|
if (!TTI.shouldBuildLookupTables())
|
2012-10-30 19:23:25 +08:00
|
|
|
return false;
|
|
|
|
|
2012-09-06 17:43:28 +08:00
|
|
|
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
|
|
|
|
// split off a dense part and build a lookup table for that.
|
|
|
|
|
|
|
|
// FIXME: This creates arrays of GEPs to constant strings, which means each
|
|
|
|
// GEP needs a runtime relocation in PIC code. We should just build one big
|
|
|
|
// string and lookup indices into that.
|
|
|
|
|
2014-01-15 13:00:27 +08:00
|
|
|
// Ignore switches with less than three cases. Lookup tables will not make them
|
|
|
|
// faster, so we don't analyze them.
|
|
|
|
if (SI->getNumCases() < 3)
|
2012-09-06 17:43:28 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Figure out the corresponding result for each case value and phi node in the
|
2015-06-19 09:53:21 +08:00
|
|
|
// common destination, as well as the min and max case values.
|
2012-09-06 17:43:28 +08:00
|
|
|
assert(SI->case_begin() != SI->case_end());
|
|
|
|
SwitchInst::CaseIt CI = SI->case_begin();
|
|
|
|
ConstantInt *MinCaseVal = CI.getCaseValue();
|
|
|
|
ConstantInt *MaxCaseVal = CI.getCaseValue();
|
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
BasicBlock *CommonDest = nullptr;
|
2012-09-10 15:44:22 +08:00
|
|
|
typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
|
2012-09-06 17:43:28 +08:00
|
|
|
SmallDenseMap<PHINode*, ResultListTy> ResultLists;
|
|
|
|
SmallDenseMap<PHINode*, Constant*> DefaultResults;
|
|
|
|
SmallDenseMap<PHINode*, Type*> ResultTypes;
|
|
|
|
SmallVector<PHINode*, 4> PHIs;
|
|
|
|
|
|
|
|
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
|
|
|
|
ConstantInt *CaseVal = CI.getCaseValue();
|
|
|
|
if (CaseVal->getValue().slt(MinCaseVal->getValue()))
|
|
|
|
MinCaseVal = CaseVal;
|
|
|
|
if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
|
|
|
|
MaxCaseVal = CaseVal;
|
|
|
|
|
|
|
|
// Resulting value at phi nodes for this case value.
|
|
|
|
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
|
|
|
|
ResultsTy Results;
|
2012-10-31 21:42:45 +08:00
|
|
|
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
|
2014-02-21 08:06:31 +08:00
|
|
|
Results, DL))
|
2012-09-06 17:43:28 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Append the result from this case to the list for each phi.
|
2014-11-18 10:37:11 +08:00
|
|
|
for (const auto &I : Results) {
|
|
|
|
PHINode *PHI = I.first;
|
|
|
|
Constant *Value = I.second;
|
|
|
|
if (!ResultLists.count(PHI))
|
|
|
|
PHIs.push_back(PHI);
|
|
|
|
ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-12 08:44:41 +08:00
|
|
|
// Keep track of the result types.
|
2014-11-18 10:37:11 +08:00
|
|
|
for (PHINode *PHI : PHIs) {
|
2014-01-12 08:44:41 +08:00
|
|
|
ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t NumResults = ResultLists[PHIs[0]].size();
|
|
|
|
APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
|
|
|
|
uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
|
|
|
|
bool TableHasHoles = (NumResults < TableSize);
|
|
|
|
|
2014-03-13 02:35:40 +08:00
|
|
|
// If the table has holes, we need a constant result for the default case
|
|
|
|
// or a bitmask that fits in a register.
|
2012-09-10 15:44:22 +08:00
|
|
|
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
|
2014-11-27 23:13:14 +08:00
|
|
|
bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
|
2015-03-10 10:37:25 +08:00
|
|
|
&CommonDest, DefaultResultsList, DL);
|
2014-11-18 10:37:11 +08:00
|
|
|
|
2014-03-13 02:35:40 +08:00
|
|
|
bool NeedMask = (TableHasHoles && !HasDefaultResults);
|
|
|
|
if (NeedMask) {
|
|
|
|
// As an extra penalty for the validity test we require more cases.
|
|
|
|
if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
|
|
|
|
return false;
|
2015-03-10 10:37:25 +08:00
|
|
|
if (!DL.fitsInLegalInteger(TableSize))
|
2014-03-13 02:35:40 +08:00
|
|
|
return false;
|
|
|
|
}
|
2014-01-12 08:44:41 +08:00
|
|
|
|
2014-11-18 10:37:11 +08:00
|
|
|
for (const auto &I : DefaultResultsList) {
|
|
|
|
PHINode *PHI = I.first;
|
|
|
|
Constant *Result = I.second;
|
2012-09-10 15:44:22 +08:00
|
|
|
DefaultResults[PHI] = Result;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2014-02-21 08:06:31 +08:00
|
|
|
if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
|
2012-09-06 17:43:28 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Create the BB that does the lookups.
|
2012-09-26 17:34:53 +08:00
|
|
|
Module &Mod = *CommonDest->getParent()->getParent();
|
2012-09-06 17:43:28 +08:00
|
|
|
BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
|
|
|
|
"switch.lookup",
|
|
|
|
CommonDest->getParent(),
|
|
|
|
CommonDest);
|
|
|
|
|
2013-10-20 15:04:37 +08:00
|
|
|
// Compute the table index value.
|
2012-09-06 17:43:28 +08:00
|
|
|
Builder.SetInsertPoint(SI);
|
|
|
|
Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
|
|
|
|
"switch.tableidx");
|
2013-10-20 15:04:37 +08:00
|
|
|
|
|
|
|
// Compute the maximum table size representable by the integer type we are
|
|
|
|
// switching upon.
|
2013-10-21 13:20:11 +08:00
|
|
|
unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
|
2014-01-12 08:44:41 +08:00
|
|
|
uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
|
2013-10-20 15:04:37 +08:00
|
|
|
assert(MaxTableSize >= TableSize &&
|
|
|
|
"It is impossible for a switch to have more entries than the max "
|
|
|
|
"representable value of its input integer type's size.");
|
|
|
|
|
2015-01-27 03:52:34 +08:00
|
|
|
// If the default destination is unreachable, or if the lookup table covers
|
|
|
|
// all values of the conditional variable, branch directly to the lookup table
|
|
|
|
// BB. Otherwise, check that the condition is within the case range.
|
|
|
|
const bool DefaultIsReachable =
|
|
|
|
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
|
|
|
|
const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
|
2014-11-27 23:13:14 +08:00
|
|
|
BranchInst *RangeCheckBranch = nullptr;
|
|
|
|
|
2015-01-27 03:52:34 +08:00
|
|
|
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
|
2013-10-20 15:04:37 +08:00
|
|
|
Builder.CreateBr(LookupBB);
|
2015-04-25 04:57:56 +08:00
|
|
|
// Note: We call removeProdecessor later since we need to be able to get the
|
|
|
|
// PHI value for the default case in case we're using a bit mask.
|
2013-10-20 15:04:37 +08:00
|
|
|
} else {
|
|
|
|
Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
|
2014-07-24 07:13:23 +08:00
|
|
|
MinCaseVal->getType(), TableSize));
|
2014-11-27 23:13:14 +08:00
|
|
|
RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
|
2013-10-20 15:04:37 +08:00
|
|
|
}
|
2012-09-06 17:43:28 +08:00
|
|
|
|
|
|
|
// Populate the BB that does the lookups.
|
|
|
|
Builder.SetInsertPoint(LookupBB);
|
2014-03-13 02:35:40 +08:00
|
|
|
|
|
|
|
if (NeedMask) {
|
|
|
|
// Before doing the lookup we do the hole check.
|
|
|
|
// The LookupBB is therefore re-purposed to do the hole check
|
|
|
|
// and we create a new LookupBB.
|
|
|
|
BasicBlock *MaskBB = LookupBB;
|
|
|
|
MaskBB->setName("switch.hole_check");
|
|
|
|
LookupBB = BasicBlock::Create(Mod.getContext(),
|
|
|
|
"switch.lookup",
|
|
|
|
CommonDest->getParent(),
|
|
|
|
CommonDest);
|
|
|
|
|
2014-11-18 03:39:56 +08:00
|
|
|
// Make the mask's bitwidth at least 8bit and a power-of-2 to avoid
|
|
|
|
// unnecessary illegal types.
|
|
|
|
uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
|
|
|
|
APInt MaskInt(TableSizePowOf2, 0);
|
|
|
|
APInt One(TableSizePowOf2, 1);
|
2014-03-13 02:35:40 +08:00
|
|
|
// Build bitmask; fill in a 1 bit for every case.
|
|
|
|
const ResultListTy &ResultList = ResultLists[PHIs[0]];
|
|
|
|
for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
|
|
|
|
uint64_t Idx = (ResultList[I].first->getValue() -
|
|
|
|
MinCaseVal->getValue()).getLimitedValue();
|
|
|
|
MaskInt |= One << Idx;
|
|
|
|
}
|
|
|
|
ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
|
|
|
|
|
|
|
|
// Get the TableIndex'th bit of the bitmask.
|
|
|
|
// If this bit is 0 (meaning hole) jump to the default destination,
|
|
|
|
// else continue with table lookup.
|
|
|
|
IntegerType *MapTy = TableMask->getType();
|
|
|
|
Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy,
|
|
|
|
"switch.maskindex");
|
|
|
|
Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex,
|
|
|
|
"switch.shifted");
|
|
|
|
Value *LoBit = Builder.CreateTrunc(Shifted,
|
|
|
|
Type::getInt1Ty(Mod.getContext()),
|
|
|
|
"switch.lobit");
|
|
|
|
Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(LookupBB);
|
|
|
|
AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
|
|
|
|
}
|
|
|
|
|
2015-04-25 04:57:56 +08:00
|
|
|
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
|
|
|
|
// We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
|
|
|
|
// do not delete PHINodes here.
|
|
|
|
SI->getDefaultDest()->removePredecessor(SI->getParent(),
|
|
|
|
/*DontDeleteUselessPHIs=*/true);
|
|
|
|
}
|
|
|
|
|
2012-09-06 17:43:28 +08:00
|
|
|
bool ReturnedEarly = false;
|
2012-09-26 17:34:53 +08:00
|
|
|
for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
|
|
|
|
PHINode *PHI = PHIs[I];
|
2014-11-27 23:13:14 +08:00
|
|
|
const ResultListTy &ResultList = ResultLists[PHI];
|
2012-09-26 17:34:53 +08:00
|
|
|
|
2014-03-13 02:35:40 +08:00
|
|
|
// If using a bitmask, use any value to fill the lookup table holes.
|
|
|
|
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
|
2014-11-27 23:13:14 +08:00
|
|
|
SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL);
|
2012-09-06 17:43:28 +08:00
|
|
|
|
2014-07-25 05:13:20 +08:00
|
|
|
Value *Result = Table.BuildLookup(TableIndex, Builder);
|
2012-09-06 17:43:28 +08:00
|
|
|
|
2012-09-19 22:24:21 +08:00
|
|
|
// If the result is used to return immediately from the function, we want to
|
|
|
|
// do that right here.
|
2014-03-09 11:16:01 +08:00
|
|
|
if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
|
|
|
|
PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
|
2012-09-19 22:24:21 +08:00
|
|
|
Builder.CreateRet(Result);
|
|
|
|
ReturnedEarly = true;
|
|
|
|
break;
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
2014-11-27 23:13:14 +08:00
|
|
|
// Do a small peephole optimization: re-use the switch table compare if
|
|
|
|
// possible.
|
|
|
|
if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
|
|
|
|
BasicBlock *PhiBlock = PHI->getParent();
|
|
|
|
// Search for compare instructions which use the phi.
|
|
|
|
for (auto *User : PHI->users()) {
|
|
|
|
reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-19 22:24:21 +08:00
|
|
|
PHI->addIncoming(Result, LookupBB);
|
2012-09-06 17:43:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!ReturnedEarly)
|
|
|
|
Builder.CreateBr(CommonDest);
|
|
|
|
|
|
|
|
// Remove the switch.
|
2013-10-21 13:20:11 +08:00
|
|
|
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
|
2012-09-06 17:43:28 +08:00
|
|
|
BasicBlock *Succ = SI->getSuccessor(i);
|
2013-10-20 15:04:37 +08:00
|
|
|
|
2013-10-21 13:20:11 +08:00
|
|
|
if (Succ == SI->getDefaultDest())
|
2013-10-20 15:04:37 +08:00
|
|
|
continue;
|
2012-09-06 17:43:28 +08:00
|
|
|
Succ->removePredecessor(SI->getParent());
|
|
|
|
}
|
|
|
|
SI->eraseFromParent();
|
|
|
|
|
|
|
|
++NumLookupTables;
|
2014-03-13 02:35:40 +08:00
|
|
|
if (NeedMask)
|
|
|
|
++NumLookupTablesHoles;
|
2012-09-06 17:43:28 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-05-19 04:35:38 +08:00
|
|
|
bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
|
2010-12-13 14:25:44 +08:00
|
|
|
BasicBlock *BB = SI->getParent();
|
|
|
|
|
2012-10-26 02:51:15 +08:00
|
|
|
if (isValueEqualityComparison(SI)) {
|
|
|
|
// If we only have one predecessor, and if it is a branch on this value,
|
|
|
|
// see if that predecessor totally determines the outcome of this switch.
|
|
|
|
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
|
|
|
|
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-10-26 02:51:15 +08:00
|
|
|
|
|
|
|
Value *Cond = SI->getCondition();
|
|
|
|
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
|
|
|
|
if (SimplifySwitchOnSelect(SI, Select))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-10-26 02:51:15 +08:00
|
|
|
|
|
|
|
// If the block only contains the switch, see if we can fold the block
|
|
|
|
// away into any preds.
|
|
|
|
BasicBlock::iterator BBI = BB->begin();
|
|
|
|
// Ignore dbg intrinsics.
|
|
|
|
while (isa<DbgInfoIntrinsic>(BBI))
|
|
|
|
++BBI;
|
|
|
|
if (SI == &*BBI)
|
|
|
|
if (FoldValueComparisonIntoPredecessors(SI, Builder))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-10-26 02:51:15 +08:00
|
|
|
}
|
2011-02-02 23:56:22 +08:00
|
|
|
|
|
|
|
// Try to transform the switch into an icmp and a branch.
|
2011-05-19 04:35:38 +08:00
|
|
|
if (TurnSwitchRangeIntoICmp(SI, Builder))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2011-05-14 23:57:25 +08:00
|
|
|
|
|
|
|
// Remove unreachable cases.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (EliminateDeadSwitchCases(SI, AC, DL))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2011-05-14 23:57:25 +08:00
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
if (SwitchToSelect(SI, Builder, AC, DL))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2014-10-14 09:58:26 +08:00
|
|
|
|
2011-06-18 18:28:47 +08:00
|
|
|
if (ForwardSwitchConditionToPHI(SI))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2011-06-18 18:28:47 +08:00
|
|
|
|
2015-03-10 10:37:25 +08:00
|
|
|
if (SwitchToLookupTable(SI, Builder, DL, TTI))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-09-06 17:43:28 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
|
|
|
|
BasicBlock *BB = IBI->getParent();
|
|
|
|
bool Changed = false;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Eliminate redundant destinations.
|
|
|
|
SmallPtrSet<Value *, 8> Succs;
|
|
|
|
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
|
|
|
|
BasicBlock *Dest = IBI->getDestination(i);
|
2014-11-19 15:49:26 +08:00
|
|
|
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
|
2010-12-13 14:25:44 +08:00
|
|
|
Dest->removePredecessor(BB);
|
|
|
|
IBI->removeDestination(i);
|
|
|
|
--i; --e;
|
2004-10-18 12:07:22 +08:00
|
|
|
Changed = true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
}
|
2004-10-18 12:07:22 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
if (IBI->getNumDestinations() == 0) {
|
|
|
|
// If the indirectbr has no successors, change it to unreachable.
|
|
|
|
new UnreachableInst(IBI->getContext(), IBI);
|
|
|
|
EraseTerminatorInstAndDCECond(IBI);
|
|
|
|
return true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
if (IBI->getNumDestinations() == 1) {
|
|
|
|
// If the indirectbr has one successor, change it to a direct branch.
|
|
|
|
BranchInst::Create(IBI->getDestination(0), IBI);
|
|
|
|
EraseTerminatorInstAndDCECond(IBI);
|
|
|
|
return true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
|
|
|
|
if (SimplifyIndirectBrOnSelect(IBI, SI))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 14:25:44 +08:00
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
2004-10-18 12:07:22 +08:00
|
|
|
|
2015-03-25 06:28:45 +08:00
|
|
|
/// Given an block with only a single landing pad and a unconditional branch
|
|
|
|
/// try to find another basic block which this one can be merged with. This
|
|
|
|
/// handles cases where we have multiple invokes with unique landing pads, but
|
|
|
|
/// a shared handler.
|
|
|
|
///
|
|
|
|
/// We specifically choose to not worry about merging non-empty blocks
|
|
|
|
/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
|
|
|
|
/// practice, the optimizer produces empty landing pad blocks quite frequently
|
|
|
|
/// when dealing with exception dense code. (see: instcombine, gvn, if-else
|
|
|
|
/// sinking in this file)
|
|
|
|
///
|
|
|
|
/// This is primarily a code size optimization. We need to avoid performing
|
|
|
|
/// any transform which might inhibit optimization (such as our ability to
|
|
|
|
/// specialize a particular handler via tail commoning). We do this by not
|
|
|
|
/// merging any blocks which require us to introduce a phi. Since the same
|
|
|
|
/// values are flowing through both blocks, we don't loose any ability to
|
|
|
|
/// specialize. If anything, we make such specialization more likely.
|
|
|
|
///
|
|
|
|
/// TODO - This transformation could remove entries from a phi in the target
|
|
|
|
/// block when the inputs in the phi are the same for the two blocks being
|
|
|
|
/// merged. In some cases, this could result in removal of the PHI entirely.
|
|
|
|
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
|
|
|
|
BasicBlock *BB) {
|
|
|
|
auto Succ = BB->getUniqueSuccessor();
|
|
|
|
assert(Succ);
|
|
|
|
// If there's a phi in the successor block, we'd likely have to introduce
|
|
|
|
// a phi into the merged landing pad block.
|
|
|
|
if (isa<PHINode>(*Succ->begin()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (BasicBlock *OtherPred : predecessors(Succ)) {
|
|
|
|
if (BB == OtherPred)
|
|
|
|
continue;
|
|
|
|
BasicBlock::iterator I = OtherPred->begin();
|
|
|
|
LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
|
|
|
|
if (!LPad2 || !LPad2->isIdenticalTo(LPad))
|
|
|
|
continue;
|
|
|
|
for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
|
|
|
|
BranchInst *BI2 = dyn_cast<BranchInst>(I);
|
|
|
|
if (!BI2 || !BI2->isIdenticalTo(BI))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// We've found an identical block. Update our predeccessors to take that
|
|
|
|
// path instead and make ourselves dead.
|
|
|
|
SmallSet<BasicBlock *, 16> Preds;
|
|
|
|
Preds.insert(pred_begin(BB), pred_end(BB));
|
|
|
|
for (BasicBlock *Pred : Preds) {
|
|
|
|
InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
|
|
|
|
assert(II->getNormalDest() != BB &&
|
|
|
|
II->getUnwindDest() == BB && "unexpected successor");
|
|
|
|
II->setUnwindDest(OtherPred);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The debug info in OtherPred doesn't cover the merged control flow that
|
|
|
|
// used to go through BB. We need to delete it or update it.
|
|
|
|
for (auto I = OtherPred->begin(), E = OtherPred->end();
|
|
|
|
I != E;) {
|
|
|
|
Instruction &Inst = *I; I++;
|
|
|
|
if (isa<DbgInfoIntrinsic>(Inst))
|
|
|
|
Inst.eraseFromParent();
|
|
|
|
}
|
|
|
|
|
|
|
|
SmallSet<BasicBlock *, 16> Succs;
|
|
|
|
Succs.insert(succ_begin(BB), succ_end(BB));
|
|
|
|
for (BasicBlock *Succ : Succs) {
|
|
|
|
Succ->removePredecessor(BB);
|
|
|
|
}
|
|
|
|
|
|
|
|
IRBuilder<> Builder(BI);
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
BI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-05-19 02:28:48 +08:00
|
|
|
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
|
2010-12-13 14:25:44 +08:00
|
|
|
BasicBlock *BB = BI->getParent();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-09-21 06:37:36 +08:00
|
|
|
if (SinkCommon && SinkThenElseCodeToEnd(BI))
|
|
|
|
return true;
|
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If the Terminator is the only non-phi instruction, simplify the block.
|
2014-07-31 05:04:00 +08:00
|
|
|
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
|
2010-12-13 14:25:44 +08:00
|
|
|
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
|
|
|
|
TryToSimplifyUncondBranchFromEmptyBlock(BB))
|
|
|
|
return true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If the only instruction in the block is a seteq/setne comparison
|
|
|
|
// against a constant, try to simplify the block.
|
|
|
|
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
|
|
|
|
if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
|
|
|
|
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
|
|
|
|
;
|
2011-12-27 04:37:40 +08:00
|
|
|
if (I->isTerminator() &&
|
2015-03-10 10:37:25 +08:00
|
|
|
TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
|
|
|
|
BonusInstThreshold, AC))
|
2004-10-18 12:07:22 +08:00
|
|
|
return true;
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2015-03-25 06:28:45 +08:00
|
|
|
// See if we can merge an empty landing pad block with another which is
|
|
|
|
// equivalent.
|
|
|
|
if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
|
|
|
|
for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
|
|
|
|
if (I->isTerminator() &&
|
|
|
|
TryToMergeLandingPad(LPad, BI, BB))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-06-13 13:43:29 +08:00
|
|
|
// If this basic block is ONLY a compare and a branch, and if a predecessor
|
|
|
|
// branches to us and our successor, fold the comparison into the
|
|
|
|
// predecessor and use logical operations to update the incoming value
|
|
|
|
// for PHI nodes in common successor.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (FoldBranchToCommonDest(BI, BonusInstThreshold))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 14:25:44 +08:00
|
|
|
return false;
|
|
|
|
}
|
2010-08-14 08:29:42 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
|
2011-05-19 04:35:38 +08:00
|
|
|
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
|
2010-12-13 14:25:44 +08:00
|
|
|
BasicBlock *BB = BI->getParent();
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Conditional branch
|
|
|
|
if (isValueEqualityComparison(BI)) {
|
|
|
|
// If we only have one predecessor, and if it is a branch on this value,
|
|
|
|
// see if that predecessor totally determines the outcome of this
|
|
|
|
// switch.
|
|
|
|
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
|
2011-05-19 04:35:38 +08:00
|
|
|
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// This block must be empty, except for the setcond inst, if it exists.
|
|
|
|
// Ignore dbg intrinsics.
|
|
|
|
BasicBlock::iterator I = BB->begin();
|
|
|
|
// Ignore dbg intrinsics.
|
|
|
|
while (isa<DbgInfoIntrinsic>(I))
|
|
|
|
++I;
|
|
|
|
if (&*I == BI) {
|
2011-05-19 04:53:17 +08:00
|
|
|
if (FoldValueComparisonIntoPredecessors(BI, Builder))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 14:25:44 +08:00
|
|
|
} else if (&*I == cast<Instruction>(BI->getCondition())){
|
|
|
|
++I;
|
|
|
|
// Ignore dbg intrinsics.
|
|
|
|
while (isa<DbgInfoIntrinsic>(I))
|
|
|
|
++I;
|
2011-05-19 04:53:17 +08:00
|
|
|
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 14:25:44 +08:00
|
|
|
}
|
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (SimplifyBranchOnICmpChain(BI, Builder, DL))
|
2010-12-13 14:25:44 +08:00
|
|
|
return true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2012-01-06 07:58:56 +08:00
|
|
|
// If this basic block is ONLY a compare and a branch, and if a predecessor
|
|
|
|
// branches to us and one of our successors, fold the comparison into the
|
|
|
|
// predecessor and use logical operations to pick the right destination.
|
2015-03-10 10:37:25 +08:00
|
|
|
if (FoldBranchToCommonDest(BI, BonusInstThreshold))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// We have a conditional branch to two blocks that are only reachable
|
|
|
|
// from BI. We know that the condbr dominates the two blocks, so see if
|
|
|
|
// there is any identical code in the "then" and "else" blocks. If so, we
|
|
|
|
// can hoist it up to the branching block.
|
2014-04-25 13:29:35 +08:00
|
|
|
if (BI->getSuccessor(0)->getSinglePredecessor()) {
|
|
|
|
if (BI->getSuccessor(1)->getSinglePredecessor()) {
|
2015-03-10 10:37:25 +08:00
|
|
|
if (HoistThenElseCodeToIf(BI, TTI))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 14:25:44 +08:00
|
|
|
} else {
|
|
|
|
// If Successor #1 has multiple preds, we may be able to conditionally
|
2014-07-07 07:10:24 +08:00
|
|
|
// execute Successor #0 if it branches to Successor #1.
|
2010-12-13 14:25:44 +08:00
|
|
|
TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
|
|
|
|
if (Succ0TI->getNumSuccessors() == 1 &&
|
|
|
|
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
|
2015-03-10 10:37:25 +08:00
|
|
|
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-08-14 08:29:42 +08:00
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
|
2010-12-13 14:25:44 +08:00
|
|
|
// If Successor #0 has multiple preds, we may be able to conditionally
|
2014-07-07 07:10:24 +08:00
|
|
|
// execute Successor #1 if it branches to Successor #0.
|
2010-12-13 14:25:44 +08:00
|
|
|
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
|
|
|
|
if (Succ1TI->getNumSuccessors() == 1 &&
|
|
|
|
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
|
2015-03-10 10:37:25 +08:00
|
|
|
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
|
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2004-02-16 14:35:48 +08:00
|
|
|
}
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If this is a branch on a phi node in the current block, thread control
|
|
|
|
// through this block if any PHI node entries are constants.
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
|
|
|
|
if (PN->getParent() == BI->getParent())
|
2014-02-21 08:06:31 +08:00
|
|
|
if (FoldCondBranchOnPHI(BI, DL))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Scan predecessor blocks for conditional branches.
|
2014-07-22 01:06:51 +08:00
|
|
|
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
|
|
|
|
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
|
2010-12-13 14:25:44 +08:00
|
|
|
if (PBI != BI && PBI->isConditional())
|
|
|
|
if (SimplifyCondBranchToCondBranch(PBI, BI))
|
2015-03-10 10:37:25 +08:00
|
|
|
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
2010-12-13 14:25:44 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-08-26 09:22:29 +08:00
|
|
|
/// Check if passing a value to an instruction will cause undefined behavior.
|
|
|
|
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
|
|
|
|
Constant *C = dyn_cast<Constant>(V);
|
|
|
|
if (!C)
|
|
|
|
return false;
|
|
|
|
|
2012-10-05 00:11:49 +08:00
|
|
|
if (I->use_empty())
|
2011-08-26 09:22:29 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (C->isNullValue()) {
|
2012-10-05 00:11:49 +08:00
|
|
|
// Only look at the first use, avoid hurting compile time with long uselists
|
2014-03-09 11:16:01 +08:00
|
|
|
User *Use = *I->user_begin();
|
2011-08-26 09:22:29 +08:00
|
|
|
|
|
|
|
// Now make sure that there are no instructions in between that can alter
|
|
|
|
// control flow (eg. calls)
|
|
|
|
for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
|
2011-08-26 10:25:55 +08:00
|
|
|
if (i == I->getParent()->end() || i->mayHaveSideEffects())
|
2011-08-26 09:22:29 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Look through GEPs. A load from a GEP derived from NULL is still undefined
|
|
|
|
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
|
|
|
|
if (GEP->getPointerOperand() == I)
|
|
|
|
return passingValueIsAlwaysUndefined(V, GEP);
|
|
|
|
|
|
|
|
// Look through bitcasts.
|
|
|
|
if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
|
|
|
|
return passingValueIsAlwaysUndefined(V, BC);
|
|
|
|
|
2011-08-26 10:25:55 +08:00
|
|
|
// Load from null is undefined.
|
|
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
|
2013-03-07 09:03:35 +08:00
|
|
|
if (!LI->isVolatile())
|
|
|
|
return LI->getPointerAddressSpace() == 0;
|
2011-08-26 09:22:29 +08:00
|
|
|
|
2011-08-26 10:25:55 +08:00
|
|
|
// Store to null is undefined.
|
|
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(Use))
|
2013-03-07 09:03:35 +08:00
|
|
|
if (!SI->isVolatile())
|
|
|
|
return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
|
2011-08-26 09:22:29 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If BB has an incoming value that will always trigger undefined behavior
|
2011-12-27 04:37:40 +08:00
|
|
|
/// (eg. null pointer dereference), remove the branch leading here.
|
2011-08-26 09:22:29 +08:00
|
|
|
static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
|
|
|
|
for (BasicBlock::iterator i = BB->begin();
|
|
|
|
PHINode *PHI = dyn_cast<PHINode>(i); ++i)
|
|
|
|
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
|
|
|
|
if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
|
|
|
|
TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
|
|
|
|
IRBuilder<> Builder(T);
|
|
|
|
if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
|
|
|
|
BB->removePredecessor(PHI->getIncomingBlock(i));
|
|
|
|
// Turn uncoditional branches into unreachables and remove the dead
|
|
|
|
// destination from conditional branches.
|
|
|
|
if (BI->isUnconditional())
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
else
|
|
|
|
Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
|
|
|
|
BI->getSuccessor(0));
|
|
|
|
BI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// TODO: SwitchInst.
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
bool SimplifyCFGOpt::run(BasicBlock *BB) {
|
|
|
|
bool Changed = false;
|
|
|
|
|
2010-12-14 14:17:25 +08:00
|
|
|
assert(BB && BB->getParent() && "Block not embedded in function!");
|
2010-12-13 14:25:44 +08:00
|
|
|
assert(BB->getTerminator() && "Degenerate basic block encountered!");
|
|
|
|
|
|
|
|
// Remove basic blocks that have no predecessors (except the entry block)...
|
|
|
|
// or that just have themself as a predecessor. These are unreachable.
|
2015-01-13 12:17:47 +08:00
|
|
|
if ((pred_empty(BB) &&
|
2010-12-14 14:17:25 +08:00
|
|
|
BB != &BB->getParent()->getEntryBlock()) ||
|
2010-12-13 14:25:44 +08:00
|
|
|
BB->getSinglePredecessor() == BB) {
|
|
|
|
DEBUG(dbgs() << "Removing BB: \n" << *BB);
|
|
|
|
DeleteDeadBlock(BB);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check to see if we can constant propagate this terminator instruction
|
|
|
|
// away...
|
2011-05-23 00:24:18 +08:00
|
|
|
Changed |= ConstantFoldTerminator(BB, true);
|
2010-12-13 14:25:44 +08:00
|
|
|
|
|
|
|
// Check for and eliminate duplicate PHI nodes in this block.
|
|
|
|
Changed |= EliminateDuplicatePHINodes(BB);
|
|
|
|
|
2011-08-26 09:22:29 +08:00
|
|
|
// Check for and remove branches that will always cause undefined behavior.
|
|
|
|
Changed |= removeUndefIntroducingPredecessor(BB);
|
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// Merge basic blocks into their predecessor if there is only one distinct
|
|
|
|
// pred, and if there is only one distinct successor of the predecessor, and
|
|
|
|
// if there are no PHI nodes.
|
|
|
|
//
|
|
|
|
if (MergeBlockIntoPredecessor(BB))
|
|
|
|
return true;
|
2012-08-30 05:46:36 +08:00
|
|
|
|
2011-05-19 02:01:27 +08:00
|
|
|
IRBuilder<> Builder(BB);
|
|
|
|
|
2010-12-13 14:25:44 +08:00
|
|
|
// If there is a trivial two-entry PHI node in this basic block, and we can
|
|
|
|
// eliminate it, do so now.
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
|
|
|
|
if (PN->getNumIncomingValues() == 2)
|
2015-03-10 10:37:25 +08:00
|
|
|
Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
|
2010-12-13 14:25:44 +08:00
|
|
|
|
2011-05-19 04:35:38 +08:00
|
|
|
Builder.SetInsertPoint(BB->getTerminator());
|
2010-12-13 14:25:44 +08:00
|
|
|
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
|
2010-12-13 14:36:51 +08:00
|
|
|
if (BI->isUnconditional()) {
|
2011-05-19 02:28:48 +08:00
|
|
|
if (SimplifyUncondBranch(BI, Builder)) return true;
|
2010-12-13 14:36:51 +08:00
|
|
|
} else {
|
2011-05-19 04:35:38 +08:00
|
|
|
if (SimplifyCondBranch(BI, Builder)) return true;
|
2010-12-13 14:36:51 +08:00
|
|
|
}
|
|
|
|
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
|
2011-05-19 05:33:11 +08:00
|
|
|
if (SimplifyReturn(RI, Builder)) return true;
|
2012-02-07 05:16:41 +08:00
|
|
|
} else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
|
|
|
|
if (SimplifyResume(RI, Builder)) return true;
|
2010-12-13 14:36:51 +08:00
|
|
|
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
|
2011-05-19 04:35:38 +08:00
|
|
|
if (SimplifySwitch(SI, Builder)) return true;
|
2010-12-13 14:36:51 +08:00
|
|
|
} else if (UnreachableInst *UI =
|
|
|
|
dyn_cast<UnreachableInst>(BB->getTerminator())) {
|
|
|
|
if (SimplifyUnreachable(UI)) return true;
|
|
|
|
} else if (IndirectBrInst *IBI =
|
|
|
|
dyn_cast<IndirectBrInst>(BB->getTerminator())) {
|
|
|
|
if (SimplifyIndirectBr(IBI)) return true;
|
2010-12-13 14:25:44 +08:00
|
|
|
}
|
|
|
|
|
2003-08-18 03:41:53 +08:00
|
|
|
return Changed;
|
2002-05-22 04:50:24 +08:00
|
|
|
}
|
2010-02-06 06:03:18 +08:00
|
|
|
|
2015-06-25 04:40:57 +08:00
|
|
|
/// This function is used to do simplification of a CFG.
|
|
|
|
/// For example, it adjusts branches to branches to eliminate the extra hop,
|
2010-02-06 06:03:18 +08:00
|
|
|
/// eliminates unreachable basic blocks, and does other "peephole" optimization
|
|
|
|
/// of the CFG. It returns true if a modification was made.
|
|
|
|
///
|
2013-01-07 11:53:25 +08:00
|
|
|
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
|
2015-03-10 10:37:25 +08:00
|
|
|
unsigned BonusInstThreshold, AssumptionCache *AC) {
|
|
|
|
return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
|
|
|
|
BonusInstThreshold, AC).run(BB);
|
2010-02-06 06:03:18 +08:00
|
|
|
}
|