2003-05-21 05:01:22 +08:00
|
|
|
//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2005-04-22 07:48:37 +08:00
|
|
|
//
|
2003-10-21 03:43:21 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2001-06-28 07:38:11 +08:00
|
|
|
//
|
2003-05-21 05:01:22 +08:00
|
|
|
// This file implements sparse conditional constant propagation and merging:
|
2001-06-28 07:38:11 +08:00
|
|
|
//
|
|
|
|
// Specifically, this:
|
|
|
|
// * Assumes values are constant unless proven otherwise
|
|
|
|
// * Assumes BasicBlocks are dead unless proven otherwise
|
|
|
|
// * Proves values to be constant, and replaces them with constants
|
2002-08-31 07:39:00 +08:00
|
|
|
// * Proves conditional branches to be unconditional
|
2001-06-28 07:38:11 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-03-23 05:41:29 +08:00
|
|
|
#include "llvm/Transforms/Scalar/SCCP.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
|
|
#include "llvm/ADT/DenseSet.h"
|
2019-06-27 05:44:37 +08:00
|
|
|
#include "llvm/ADT/MapVector.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/PointerIntPair.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
|
|
|
#include "llvm/Analysis/ConstantFolding.h"
|
2020-06-25 02:02:35 +08:00
|
|
|
#include "llvm/Analysis/DomTreeUpdater.h"
|
2016-05-06 05:05:36 +08:00
|
|
|
#include "llvm/Analysis/GlobalsModRef.h"
|
2020-04-14 18:15:20 +08:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
2015-03-24 03:32:43 +08:00
|
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
Recommit r315288: [SCCP] Propagate integer range info for parameters in IPSCCP.
This version of the patch includes a fix addressing a stage2 LTO buildbot
failure and addressed some additional nits.
Original commit message:
This updates the SCCP solver to use of the ValueElement lattice for
parameters, which provides integer range information. The range
information is used to remove unneeded icmp instructions.
For the following function, f() can be optimized to ret i32 2 with
this change
source_filename = "sccp.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @main() local_unnamed_addr #0 {
entry:
%call = tail call fastcc i32 @f(i32 1)
%call1 = tail call fastcc i32 @f(i32 47)
%add3 = add nsw i32 %call, %call1
ret i32 %add3
}
; Function Attrs: noinline norecurse nounwind readnone uwtable
define internal fastcc i32 @f(i32 %x) unnamed_addr #1 {
entry:
%c1 = icmp sle i32 %x, 100
%cmp = icmp sgt i32 %x, 300
%. = select i1 %cmp, i32 1, i32 2
ret i32 %.
}
attributes #1 = { noinline }
Reviewers: davide, sanjoy, efriedma, dberlin
Reviewed By: davide, dberlin
Subscribers: mcrosier, gberry, mssimpso, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D36656
llvm-svn: 316891
2017-10-30 18:07:42 +08:00
|
|
|
#include "llvm/Analysis/ValueLattice.h"
|
2017-10-14 01:53:44 +08:00
|
|
|
#include "llvm/Analysis/ValueLatticeUtils.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/Constant.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
2014-03-06 11:23:41 +08:00
|
|
|
#include "llvm/IR/InstVisitor.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/IR/InstrTypes.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Instructions.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/IR/PassManager.h"
|
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/IR/User.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/InitializePasses.h"
|
2002-02-27 05:46:54 +08:00
|
|
|
#include "llvm/Pass.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include "llvm/Support/Casting.h"
|
2004-09-02 06:55:40 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2009-07-11 21:10:19 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2009-07-25 08:23:56 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2016-05-06 05:05:36 +08:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-14 05:15:01 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2018-08-23 19:04:00 +08:00
|
|
|
#include "llvm/Transforms/Utils/PredicateInfo.h"
|
2017-10-21 05:47:29 +08:00
|
|
|
#include <cassert>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
|
|
|
|
2004-01-09 14:02:20 +08:00
|
|
|
using namespace llvm;
|
2003-11-12 06:41:34 +08:00
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "sccp"
|
|
|
|
|
2006-12-20 05:40:18 +08:00
|
|
|
STATISTIC(NumInstRemoved, "Number of instructions removed");
|
|
|
|
STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
|
2020-06-19 16:27:52 +08:00
|
|
|
STATISTIC(NumInstReplaced,
|
|
|
|
"Number of instructions replaced with (simpler) instruction");
|
2006-12-20 05:40:18 +08:00
|
|
|
|
2008-03-08 15:48:41 +08:00
|
|
|
STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
|
2006-12-20 05:40:18 +08:00
|
|
|
STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
|
|
|
|
STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
|
2020-06-19 16:27:52 +08:00
|
|
|
STATISTIC(
|
|
|
|
IPNumInstReplaced,
|
|
|
|
"Number of instructions replaced with (simpler) instruction by IPSCCP");
|
2006-12-20 05:40:18 +08:00
|
|
|
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
// The maximum number of range extensions allowed for operations requiring
|
|
|
|
// widening.
|
|
|
|
static const unsigned MaxNumRangeExtensions = 10;
|
|
|
|
|
|
|
|
/// Returns MergeOptions with MaxWidenSteps set to MaxNumRangeExtensions.
|
|
|
|
static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() {
|
|
|
|
return ValueLatticeElement::MergeOptions().setMaxWidenSteps(
|
|
|
|
MaxNumRangeExtensions);
|
|
|
|
}
|
2002-04-30 05:26:08 +08:00
|
|
|
namespace {
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
// Helper to check if \p LV is either a constant or a constant
|
|
|
|
// range with a single element. This should cover exactly the same cases as the
|
2020-03-28 23:20:10 +08:00
|
|
|
// old ValueLatticeElement::isConstant() and is intended to be used in the
|
|
|
|
// transition to ValueLatticeElement.
|
|
|
|
bool isConstant(const ValueLatticeElement &LV) {
|
2020-03-14 00:40:03 +08:00
|
|
|
return LV.isConstant() ||
|
|
|
|
(LV.isConstantRange() && LV.getConstantRange().isSingleElement());
|
|
|
|
}
|
2020-03-13 02:46:16 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
// Helper to check if \p LV is either overdefined or a constant range with more
|
|
|
|
// than a single element. This should cover exactly the same cases as the old
|
2020-03-28 23:20:10 +08:00
|
|
|
// ValueLatticeElement::isOverdefined() and is intended to be used in the
|
|
|
|
// transition to ValueLatticeElement.
|
|
|
|
bool isOverdefined(const ValueLatticeElement &LV) {
|
2020-03-14 00:40:03 +08:00
|
|
|
return LV.isOverdefined() ||
|
|
|
|
(LV.isConstantRange() && !LV.getConstantRange().isSingleElement());
|
|
|
|
}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2004-11-15 12:44:20 +08:00
|
|
|
/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
|
|
|
|
/// Constant Propagation.
|
|
|
|
///
|
|
|
|
class SCCPSolver : public InstVisitor<SCCPSolver> {
|
2015-03-05 02:43:29 +08:00
|
|
|
const DataLayout &DL;
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 11:09:36 +08:00
|
|
|
std::function<const TargetLibraryInfo &(Function &)> GetTLI;
|
2017-10-21 05:47:29 +08:00
|
|
|
SmallPtrSet<BasicBlock *, 8> BBExecutable; // The BBs that are executable.
|
2020-03-28 23:20:10 +08:00
|
|
|
DenseMap<Value *, ValueLatticeElement>
|
|
|
|
ValueState; // The state each value is in.
|
2004-07-16 07:36:43 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
/// StructValueState - This maintains ValueState for values that have
|
|
|
|
/// StructType, for example for formal arguments, calls, insertelement, etc.
|
2020-03-28 23:20:10 +08:00
|
|
|
DenseMap<std::pair<Value *, unsigned>, ValueLatticeElement> StructValueState;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2004-12-11 13:15:59 +08:00
|
|
|
/// GlobalValue - If we are tracking any values for the contents of a global
|
|
|
|
/// variable, we keep a mapping from the constant accessor to the element of
|
|
|
|
/// the global, to the currently known value. If the value becomes
|
|
|
|
/// overdefined, it's entry is simply removed from this map.
|
2020-03-28 23:20:10 +08:00
|
|
|
DenseMap<GlobalVariable *, ValueLatticeElement> TrackedGlobals;
|
2004-12-11 13:15:59 +08:00
|
|
|
|
2008-03-11 13:46:42 +08:00
|
|
|
/// TrackedRetVals - If we are tracking arguments into and the return
|
2004-12-10 16:02:06 +08:00
|
|
|
/// value out of a function, it will have an entry in this map, indicating
|
|
|
|
/// what the known return value for the function is.
|
2020-03-28 23:20:10 +08:00
|
|
|
MapVector<Function *, ValueLatticeElement> TrackedRetVals;
|
2008-03-11 13:46:42 +08:00
|
|
|
|
|
|
|
/// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
|
|
|
|
/// that return multiple values.
|
2020-03-28 23:20:10 +08:00
|
|
|
MapVector<std::pair<Function *, unsigned>, ValueLatticeElement>
|
|
|
|
TrackedMultipleRetVals;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
/// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
|
|
|
|
/// represented here for efficient lookup.
|
2017-10-21 05:47:29 +08:00
|
|
|
SmallPtrSet<Function *, 16> MRVFunctionsTracked;
|
2004-12-10 16:02:06 +08:00
|
|
|
|
2018-03-01 09:19:18 +08:00
|
|
|
/// MustTailFunctions - Each function here is a callee of non-removable
|
|
|
|
/// musttail call site.
|
|
|
|
SmallPtrSet<Function *, 16> MustTailCallees;
|
|
|
|
|
2009-11-04 04:52:57 +08:00
|
|
|
/// TrackingIncomingArguments - This is the set of functions for whose
|
|
|
|
/// arguments we make optimistic assumptions about and try to prove as
|
|
|
|
/// constants.
|
2017-10-21 05:47:29 +08:00
|
|
|
SmallPtrSet<Function *, 16> TrackingIncomingArguments;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-02 11:03:42 +08:00
|
|
|
/// The reason for two worklists is that overdefined is the lowest state
|
|
|
|
/// on the lattice, and moving things to overdefined as fast as possible
|
|
|
|
/// makes SCCP converge much faster.
|
|
|
|
///
|
|
|
|
/// By having a separate worklist, we accomplish this because everything
|
|
|
|
/// possibly overdefined will become overdefined at the soonest possible
|
|
|
|
/// point.
|
2017-10-21 05:47:29 +08:00
|
|
|
SmallVector<Value *, 64> OverdefinedInstWorkList;
|
|
|
|
SmallVector<Value *, 64> InstWorkList;
|
2004-07-16 07:36:43 +08:00
|
|
|
|
2017-10-21 05:47:29 +08:00
|
|
|
// The BasicBlock work list
|
|
|
|
SmallVector<BasicBlock *, 64> BBWorkList;
|
2003-10-09 00:55:34 +08:00
|
|
|
|
|
|
|
/// KnownFeasibleEdges - Entries in this set are edges which have already had
|
|
|
|
/// PHI nodes retriggered.
|
2017-10-21 05:47:29 +08:00
|
|
|
using Edge = std::pair<BasicBlock *, BasicBlock *>;
|
2008-08-24 07:36:38 +08:00
|
|
|
DenseSet<Edge> KnownFeasibleEdges;
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2018-11-09 19:52:27 +08:00
|
|
|
DenseMap<Function *, AnalysisResultsForFn> AnalysisResults;
|
2018-08-23 19:04:00 +08:00
|
|
|
DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers;
|
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
LLVMContext &Ctx;
|
|
|
|
|
2002-04-30 05:26:08 +08:00
|
|
|
public:
|
2018-11-09 19:52:27 +08:00
|
|
|
void addAnalysis(Function &F, AnalysisResultsForFn A) {
|
|
|
|
AnalysisResults.insert({&F, std::move(A)});
|
2018-08-23 19:04:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const PredicateBase *getPredicateInfoFor(Instruction *I) {
|
2018-11-09 19:52:27 +08:00
|
|
|
auto A = AnalysisResults.find(I->getParent()->getParent());
|
|
|
|
if (A == AnalysisResults.end())
|
2018-08-23 19:04:00 +08:00
|
|
|
return nullptr;
|
2018-11-09 19:52:27 +08:00
|
|
|
return A->second.PredInfo->getPredicateInfoFor(I);
|
|
|
|
}
|
|
|
|
|
2018-11-12 04:22:45 +08:00
|
|
|
DomTreeUpdater getDTU(Function &F) {
|
2018-11-09 19:52:27 +08:00
|
|
|
auto A = AnalysisResults.find(&F);
|
|
|
|
assert(A != AnalysisResults.end() && "Need analysis results for function.");
|
2018-11-12 04:22:45 +08:00
|
|
|
return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
|
2018-08-23 19:04:00 +08:00
|
|
|
}
|
|
|
|
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 11:09:36 +08:00
|
|
|
SCCPSolver(const DataLayout &DL,
|
2020-03-14 00:40:03 +08:00
|
|
|
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
|
|
|
|
LLVMContext &Ctx)
|
|
|
|
: DL(DL), GetTLI(std::move(GetTLI)), Ctx(Ctx) {}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
/// MarkBlockExecutable - This method can be used by clients to mark all of
|
|
|
|
/// the blocks that are known to be intrinsically live in the processed unit.
|
2009-11-02 14:11:23 +08:00
|
|
|
///
|
|
|
|
/// This returns true if the block was not considered live before.
|
|
|
|
bool MarkBlockExecutable(BasicBlock *BB) {
|
2014-11-19 15:49:26 +08:00
|
|
|
if (!BBExecutable.insert(BB).second)
|
|
|
|
return false;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n');
|
2004-11-15 12:44:20 +08:00
|
|
|
BBWorkList.push_back(BB); // Add the block to the work list!
|
2009-11-02 14:11:23 +08:00
|
|
|
return true;
|
2004-11-15 12:44:20 +08:00
|
|
|
}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
2004-12-11 13:15:59 +08:00
|
|
|
/// TrackValueOfGlobalVariable - Clients can use this method to
|
2004-12-10 16:02:06 +08:00
|
|
|
/// inform the SCCPSolver that it should track loads and stores to the
|
|
|
|
/// specified global variable if it can. This is only legal to call if
|
|
|
|
/// performing Interprocedural SCCP.
|
2004-12-11 13:15:59 +08:00
|
|
|
void TrackValueOfGlobalVariable(GlobalVariable *GV) {
|
2009-11-04 07:40:48 +08:00
|
|
|
// We only track the contents of scalar globals.
|
2016-01-17 04:30:46 +08:00
|
|
|
if (GV->getValueType()->isSingleValueType()) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement &IV = TrackedGlobals[GV];
|
2004-12-11 13:15:59 +08:00
|
|
|
if (!isa<UndefValue>(GV->getInitializer()))
|
|
|
|
IV.markConstant(GV->getInitializer());
|
|
|
|
}
|
|
|
|
}
|
2004-12-10 16:02:06 +08:00
|
|
|
|
|
|
|
/// AddTrackedFunction - If the SCCP solver is supposed to track calls into
|
|
|
|
/// and out of the specified function (which cannot have its address taken),
|
|
|
|
/// this method must be called.
|
|
|
|
void AddTrackedFunction(Function *F) {
|
|
|
|
// Add an entry, F -> undef.
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
|
2009-11-04 07:40:48 +08:00
|
|
|
MRVFunctionsTracked.insert(F);
|
2008-03-11 13:46:42 +08:00
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
|
2020-03-28 23:20:10 +08:00
|
|
|
TrackedMultipleRetVals.insert(
|
|
|
|
std::make_pair(std::make_pair(F, i), ValueLatticeElement()));
|
2008-04-23 13:38:20 +08:00
|
|
|
} else
|
2020-03-28 23:20:10 +08:00
|
|
|
TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement()));
|
2004-12-10 16:02:06 +08:00
|
|
|
}
|
|
|
|
|
2018-03-01 09:19:18 +08:00
|
|
|
/// AddMustTailCallee - If the SCCP solver finds that this function is called
|
|
|
|
/// from non-removable musttail call site.
|
|
|
|
void AddMustTailCallee(Function *F) {
|
|
|
|
MustTailCallees.insert(F);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if the given function is called from non-removable musttail
|
|
|
|
/// call site.
|
|
|
|
bool isMustTailCallee(Function *F) {
|
|
|
|
return MustTailCallees.count(F);
|
|
|
|
}
|
|
|
|
|
2009-11-04 03:24:51 +08:00
|
|
|
void AddArgumentTrackedFunction(Function *F) {
|
|
|
|
TrackingIncomingArguments.insert(F);
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2017-10-14 01:53:44 +08:00
|
|
|
/// Returns true if the given function is in the solver's set of
|
|
|
|
/// argument-tracked functions.
|
|
|
|
bool isArgumentTrackedFunction(Function *F) {
|
|
|
|
return TrackingIncomingArguments.count(F);
|
|
|
|
}
|
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
/// Solve - Solve for constants and executable blocks.
|
|
|
|
void Solve();
|
|
|
|
|
2006-12-20 14:21:33 +08:00
|
|
|
/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
|
2004-12-11 04:41:50 +08:00
|
|
|
/// that branches on undef values cannot reach any of their successors.
|
|
|
|
/// However, this is not a safe assumption. After we solve dataflow, this
|
|
|
|
/// method should be use to handle this. If this returns true, the solver
|
|
|
|
/// should be rerun.
|
2006-12-20 14:21:33 +08:00
|
|
|
bool ResolvedUndefsIn(Function &F);
|
2004-12-11 04:41:50 +08:00
|
|
|
|
2008-08-24 07:39:31 +08:00
|
|
|
bool isBlockExecutable(BasicBlock *BB) const {
|
|
|
|
return BBExecutable.count(BB);
|
2002-04-30 05:26:08 +08:00
|
|
|
}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
2018-07-20 07:02:07 +08:00
|
|
|
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
|
|
|
|
// block to the 'To' basic block is currently feasible.
|
|
|
|
bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
|
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const {
|
|
|
|
std::vector<ValueLatticeElement> StructValues;
|
2016-12-01 16:36:12 +08:00
|
|
|
auto *STy = dyn_cast<StructType>(V->getType());
|
2016-07-13 03:54:19 +08:00
|
|
|
assert(STy && "getStructLatticeValueFor() can be called only on structs");
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
auto I = StructValueState.find(std::make_pair(V, i));
|
|
|
|
assert(I != StructValueState.end() && "Value not in valuemap!");
|
|
|
|
StructValues.push_back(I->second);
|
|
|
|
}
|
|
|
|
return StructValues;
|
|
|
|
}
|
|
|
|
|
2020-06-19 16:27:52 +08:00
|
|
|
void removeLatticeValueFor(Value *V) { ValueState.erase(V); }
|
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
const ValueLatticeElement &getLatticeValueFor(Value *V) const {
|
Recommit r315288: [SCCP] Propagate integer range info for parameters in IPSCCP.
This version of the patch includes a fix addressing a stage2 LTO buildbot
failure and addressed some additional nits.
Original commit message:
This updates the SCCP solver to use of the ValueElement lattice for
parameters, which provides integer range information. The range
information is used to remove unneeded icmp instructions.
For the following function, f() can be optimized to ret i32 2 with
this change
source_filename = "sccp.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @main() local_unnamed_addr #0 {
entry:
%call = tail call fastcc i32 @f(i32 1)
%call1 = tail call fastcc i32 @f(i32 47)
%add3 = add nsw i32 %call, %call1
ret i32 %add3
}
; Function Attrs: noinline norecurse nounwind readnone uwtable
define internal fastcc i32 @f(i32 %x) unnamed_addr #1 {
entry:
%c1 = icmp sle i32 %x, 100
%cmp = icmp sgt i32 %x, 300
%. = select i1 %cmp, i32 1, i32 2
ret i32 %.
}
attributes #1 = { noinline }
Reviewers: davide, sanjoy, efriedma, dberlin
Reviewed By: davide, dberlin
Subscribers: mcrosier, gberry, mssimpso, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D36656
llvm-svn: 316891
2017-10-30 18:07:42 +08:00
|
|
|
assert(!V->getType()->isStructTy() &&
|
|
|
|
"Should use getStructLatticeValueFor");
|
2020-03-28 23:20:10 +08:00
|
|
|
DenseMap<Value *, ValueLatticeElement>::const_iterator I =
|
|
|
|
ValueState.find(V);
|
2018-07-20 21:29:12 +08:00
|
|
|
assert(I != ValueState.end() &&
|
|
|
|
"V not found in ValueState nor Paramstate map!");
|
|
|
|
return I->second;
|
2004-11-15 12:44:20 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2008-03-11 13:46:42 +08:00
|
|
|
/// getTrackedRetVals - Get the inferred return value map.
|
2020-03-28 23:20:10 +08:00
|
|
|
const MapVector<Function *, ValueLatticeElement> &getTrackedRetVals() {
|
2008-03-11 13:46:42 +08:00
|
|
|
return TrackedRetVals;
|
2004-12-11 10:53:57 +08:00
|
|
|
}
|
|
|
|
|
2004-12-11 13:15:59 +08:00
|
|
|
/// getTrackedGlobals - Get and return the set of inferred initializers for
|
|
|
|
/// global variables.
|
2020-03-28 23:20:10 +08:00
|
|
|
const DenseMap<GlobalVariable *, ValueLatticeElement> &getTrackedGlobals() {
|
2004-12-11 13:15:59 +08:00
|
|
|
return TrackedGlobals;
|
|
|
|
}
|
|
|
|
|
2016-07-21 04:17:13 +08:00
|
|
|
/// getMRVFunctionsTracked - Get the set of functions which return multiple
|
|
|
|
/// values tracked by the pass.
|
|
|
|
const SmallPtrSet<Function *, 16> getMRVFunctionsTracked() {
|
|
|
|
return MRVFunctionsTracked;
|
|
|
|
}
|
|
|
|
|
2018-03-01 09:19:18 +08:00
|
|
|
/// getMustTailCallees - Get the set of functions which are called
|
|
|
|
/// from non-removable musttail call sites.
|
|
|
|
const SmallPtrSet<Function *, 16> getMustTailCallees() {
|
|
|
|
return MustTailCallees;
|
|
|
|
}
|
|
|
|
|
2017-03-08 09:26:37 +08:00
|
|
|
/// markOverdefined - Mark the specified value overdefined. This
|
2009-11-04 07:40:48 +08:00
|
|
|
/// works with both scalars and structs.
|
2017-03-08 09:26:37 +08:00
|
|
|
void markOverdefined(Value *V) {
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *STy = dyn_cast<StructType>(V->getType()))
|
2009-11-04 07:40:48 +08:00
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
|
|
|
|
markOverdefined(getStructValueState(V, i), V);
|
|
|
|
else
|
2017-03-08 09:26:37 +08:00
|
|
|
markOverdefined(ValueState[V], V);
|
2009-11-04 07:40:48 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2016-07-21 04:17:13 +08:00
|
|
|
// isStructLatticeConstant - Return true if all the lattice values
|
2020-03-14 00:40:03 +08:00
|
|
|
// corresponding to elements of the structure are constants,
|
2016-07-21 04:17:13 +08:00
|
|
|
// false otherwise.
|
|
|
|
bool isStructLatticeConstant(Function *F, StructType *STy) {
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
|
|
|
|
assert(It != TrackedMultipleRetVals.end());
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement LV = It->second;
|
2020-03-14 00:40:03 +08:00
|
|
|
if (!isConstant(LV))
|
2016-07-21 04:17:13 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
/// Helper to return a Constant if \p LV is either a constant or a constant
|
|
|
|
/// range with a single element.
|
2020-03-28 23:20:10 +08:00
|
|
|
Constant *getConstant(const ValueLatticeElement &LV) const {
|
2020-03-14 00:40:03 +08:00
|
|
|
if (LV.isConstant())
|
|
|
|
return LV.getConstant();
|
|
|
|
|
|
|
|
if (LV.isConstantRange()) {
|
|
|
|
auto &CR = LV.getConstantRange();
|
|
|
|
if (CR.getSingleElement())
|
|
|
|
return ConstantInt::get(Ctx, *CR.getSingleElement());
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2001-06-28 07:38:11 +08:00
|
|
|
private:
|
2020-03-28 23:20:10 +08:00
|
|
|
ConstantInt *getConstantInt(const ValueLatticeElement &IV) const {
|
2020-03-14 00:40:03 +08:00
|
|
|
return dyn_cast_or_null<ConstantInt>(getConstant(IV));
|
|
|
|
}
|
|
|
|
|
2020-02-14 07:05:50 +08:00
|
|
|
// pushToWorkList - Helper for markConstant/markOverdefined
|
2020-03-28 23:20:10 +08:00
|
|
|
void pushToWorkList(ValueLatticeElement &IV, Value *V) {
|
2020-03-14 05:30:28 +08:00
|
|
|
if (IV.isOverdefined())
|
2020-03-14 00:40:03 +08:00
|
|
|
return OverdefinedInstWorkList.push_back(V);
|
|
|
|
InstWorkList.push_back(V);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Helper to push \p V to the worklist, after updating it to \p IV. Also
|
|
|
|
// prints a debug message with the updated value.
|
2020-03-28 23:20:10 +08:00
|
|
|
void pushToWorkListMsg(ValueLatticeElement &IV, Value *V) {
|
2020-03-14 00:40:03 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "updated " << IV << ": " << *V << '\n');
|
2020-05-04 17:17:24 +08:00
|
|
|
pushToWorkList(IV, V);
|
2016-07-14 03:33:25 +08:00
|
|
|
}
|
|
|
|
|
2004-07-16 07:36:43 +08:00
|
|
|
// markConstant - Make a value be marked as "constant". If the value
|
2005-04-22 07:48:37 +08:00
|
|
|
// is not already a constant, add it to the instruction work list so that
|
2001-06-28 07:38:11 +08:00
|
|
|
// the users of the instruction are updated later.
|
2020-04-14 18:15:20 +08:00
|
|
|
bool markConstant(ValueLatticeElement &IV, Value *V, Constant *C,
|
|
|
|
bool MayIncludeUndef = false) {
|
|
|
|
if (!IV.markConstant(C, MayIncludeUndef))
|
|
|
|
return false;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
|
2016-07-14 03:33:25 +08:00
|
|
|
pushToWorkList(IV, V);
|
2018-07-20 21:29:12 +08:00
|
|
|
return true;
|
2003-10-09 00:21:03 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2018-07-20 21:29:12 +08:00
|
|
|
bool markConstant(Value *V, Constant *C) {
|
2016-07-20 02:31:07 +08:00
|
|
|
assert(!V->getType()->isStructTy() && "structs should use mergeInValue");
|
2018-07-20 21:29:12 +08:00
|
|
|
return markConstant(ValueState[V], V, C);
|
2001-06-28 07:38:11 +08:00
|
|
|
}
|
|
|
|
|
2004-07-16 07:36:43 +08:00
|
|
|
// markOverdefined - Make a value be marked as "overdefined". If the
|
2005-04-22 07:48:37 +08:00
|
|
|
// value is not already overdefined, add it to the overdefined instruction
|
2004-07-16 07:36:43 +08:00
|
|
|
// work list so that the users of the instruction are updated later.
|
2020-03-28 23:20:10 +08:00
|
|
|
bool markOverdefined(ValueLatticeElement &IV, Value *V) {
|
2018-07-20 21:29:12 +08:00
|
|
|
if (!IV.markOverdefined()) return false;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "markOverdefined: ";
|
|
|
|
if (auto *F = dyn_cast<Function>(V)) dbgs()
|
|
|
|
<< "Function '" << F->getName() << "'\n";
|
|
|
|
else dbgs() << *V << '\n');
|
2009-11-02 11:03:42 +08:00
|
|
|
// Only instructions go on the work list
|
2016-12-12 05:19:03 +08:00
|
|
|
pushToWorkList(IV, V);
|
2018-07-20 21:29:12 +08:00
|
|
|
return true;
|
2003-10-09 00:21:03 +08:00
|
|
|
}
|
2004-12-10 16:02:06 +08:00
|
|
|
|
2020-04-19 15:56:08 +08:00
|
|
|
/// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV
|
|
|
|
/// changes.
|
2020-03-28 23:20:10 +08:00
|
|
|
bool mergeInValue(ValueLatticeElement &IV, Value *V,
|
2020-04-19 15:56:08 +08:00
|
|
|
ValueLatticeElement MergeWithV,
|
|
|
|
ValueLatticeElement::MergeOptions Opts = {
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
/*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
|
2020-04-19 15:56:08 +08:00
|
|
|
if (IV.mergeIn(MergeWithV, Opts)) {
|
2020-03-14 00:40:03 +08:00
|
|
|
pushToWorkList(IV, V);
|
|
|
|
LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : "
|
|
|
|
<< IV << "\n");
|
|
|
|
return true;
|
|
|
|
}
|
2018-07-20 21:29:12 +08:00
|
|
|
return false;
|
2001-06-28 07:38:11 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
bool mergeInValue(Value *V, ValueLatticeElement MergeWithV,
|
2020-04-19 15:56:08 +08:00
|
|
|
ValueLatticeElement::MergeOptions Opts = {
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
/*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
|
2016-07-20 02:31:07 +08:00
|
|
|
assert(!V->getType()->isStructTy() &&
|
|
|
|
"non-structs should use markConstant");
|
2020-04-19 15:56:08 +08:00
|
|
|
return mergeInValue(ValueState[V], V, MergeWithV, Opts);
|
2006-02-08 10:38:11 +08:00
|
|
|
}
|
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
/// getValueState - Return the ValueLatticeElement object that corresponds to
|
|
|
|
/// the value. This function handles the case when the value hasn't been seen
|
|
|
|
/// yet by properly seeding constants etc.
|
|
|
|
ValueLatticeElement &getValueState(Value *V) {
|
2010-02-16 19:11:14 +08:00
|
|
|
assert(!V->getType()->isStructTy() && "Should use getStructValueState");
|
2004-10-17 02:09:41 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
auto I = ValueState.insert(std::make_pair(V, ValueLatticeElement()));
|
|
|
|
ValueLatticeElement &LV = I.first->second;
|
2009-11-05 22:33:27 +08:00
|
|
|
|
|
|
|
if (!I.second)
|
|
|
|
return LV; // Common case, already in the map.
|
2009-11-02 11:21:36 +08:00
|
|
|
|
2020-03-15 00:50:09 +08:00
|
|
|
if (auto *C = dyn_cast<Constant>(V))
|
|
|
|
LV.markConstant(C); // Constants are constant
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-15 00:50:09 +08:00
|
|
|
// All others are unknown by default.
|
2009-11-02 11:21:36 +08:00
|
|
|
return LV;
|
2001-06-28 07:38:11 +08:00
|
|
|
}
|
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
/// getStructValueState - Return the ValueLatticeElement object that
|
|
|
|
/// corresponds to the value/field pair. This function handles the case when
|
|
|
|
/// the value hasn't been seen yet by properly seeding constants etc.
|
|
|
|
ValueLatticeElement &getStructValueState(Value *V, unsigned i) {
|
2010-02-16 19:11:14 +08:00
|
|
|
assert(V->getType()->isStructTy() && "Should use getValueState");
|
2009-11-04 07:40:48 +08:00
|
|
|
assert(i < cast<StructType>(V->getType())->getNumElements() &&
|
|
|
|
"Invalid element #");
|
2009-11-05 22:33:27 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
auto I = StructValueState.insert(
|
|
|
|
std::make_pair(std::make_pair(V, i), ValueLatticeElement()));
|
|
|
|
ValueLatticeElement &LV = I.first->second;
|
2009-11-05 22:33:27 +08:00
|
|
|
|
|
|
|
if (!I.second)
|
|
|
|
return LV; // Common case, already in the map.
|
|
|
|
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *C = dyn_cast<Constant>(V)) {
|
2012-01-26 10:32:04 +08:00
|
|
|
Constant *Elt = C->getAggregateElement(i);
|
2012-07-24 18:51:42 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!Elt)
|
2009-11-04 07:40:48 +08:00
|
|
|
LV.markOverdefined(); // Unknown sort of constant.
|
2012-01-26 10:32:04 +08:00
|
|
|
else if (isa<UndefValue>(Elt))
|
2016-07-10 08:35:15 +08:00
|
|
|
; // Undef values remain unknown.
|
2012-01-26 10:32:04 +08:00
|
|
|
else
|
|
|
|
LV.markConstant(Elt); // Constants are constant.
|
2009-11-04 07:40:48 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
// All others are underdefined by default.
|
|
|
|
return LV;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
|
|
|
|
/// work list if it is not already executable.
|
2018-07-20 07:02:07 +08:00
|
|
|
bool markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
|
2003-10-09 00:55:34 +08:00
|
|
|
if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
|
2018-07-20 07:02:07 +08:00
|
|
|
return false; // This edge is already known to be executable!
|
2003-10-09 00:55:34 +08:00
|
|
|
|
2009-11-02 14:11:23 +08:00
|
|
|
if (!MarkBlockExecutable(Dest)) {
|
|
|
|
// If the destination is already executable, we just made an *edge*
|
2003-10-09 00:56:11 +08:00
|
|
|
// feasible that wasn't before. Revisit the PHI nodes in the block
|
|
|
|
// because they have potentially new operands.
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
|
|
|
|
<< " -> " << Dest->getName() << '\n');
|
2003-04-25 10:50:03 +08:00
|
|
|
|
2017-12-30 23:27:33 +08:00
|
|
|
for (PHINode &PN : Dest->phis())
|
|
|
|
visitPHINode(PN);
|
2003-04-25 10:50:03 +08:00
|
|
|
}
|
2018-07-20 07:02:07 +08:00
|
|
|
return true;
|
2001-06-28 07:38:11 +08:00
|
|
|
}
|
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
// getFeasibleSuccessors - Return a vector of booleans to indicate which
|
|
|
|
// successors are reachable from a given terminator instruction.
|
2018-10-15 18:10:54 +08:00
|
|
|
void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs);
|
2004-11-15 12:44:20 +08:00
|
|
|
|
|
|
|
// OperandChangedState - This method is invoked on all of the users of an
|
2009-11-02 10:33:50 +08:00
|
|
|
// instruction that was just changed state somehow. Based on this
|
2004-11-15 12:44:20 +08:00
|
|
|
// information, we need to update the specified user of this instruction.
|
2009-11-03 11:42:51 +08:00
|
|
|
void OperandChangedState(Instruction *I) {
|
|
|
|
if (BBExecutable.count(I->getParent())) // Inst is executable?
|
|
|
|
visit(*I);
|
2004-11-15 12:44:20 +08:00
|
|
|
}
|
2010-12-01 04:23:21 +08:00
|
|
|
|
2018-08-23 19:04:00 +08:00
|
|
|
// Add U as additional user of V.
|
|
|
|
void addAdditionalUser(Value *V, User *U) {
|
|
|
|
auto Iter = AdditionalUsers.insert({V, {}});
|
|
|
|
Iter.first->second.insert(U);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark I's users as changed, including AdditionalUsers.
|
|
|
|
void markUsersAsChanged(Value *I) {
|
2020-03-18 04:01:09 +08:00
|
|
|
// Functions include their arguments in the use-list. Changed function
|
|
|
|
// values mean that the result of the function changed. We only need to
|
|
|
|
// update the call sites with the new function result and do not have to
|
|
|
|
// propagate the call arguments.
|
2020-03-18 08:15:50 +08:00
|
|
|
if (isa<Function>(I)) {
|
2020-03-18 04:01:09 +08:00
|
|
|
for (User *U : I->users()) {
|
2020-04-20 15:05:18 +08:00
|
|
|
if (auto *CB = dyn_cast<CallBase>(U))
|
|
|
|
handleCallResult(*CB);
|
2020-03-18 04:01:09 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (User *U : I->users())
|
|
|
|
if (auto *UI = dyn_cast<Instruction>(U))
|
|
|
|
OperandChangedState(UI);
|
|
|
|
}
|
2018-08-23 19:04:00 +08:00
|
|
|
|
|
|
|
auto Iter = AdditionalUsers.find(I);
|
|
|
|
if (Iter != AdditionalUsers.end()) {
|
|
|
|
for (User *U : Iter->second)
|
|
|
|
if (auto *UI = dyn_cast<Instruction>(U))
|
|
|
|
OperandChangedState(UI);
|
|
|
|
}
|
|
|
|
}
|
2020-04-20 15:05:18 +08:00
|
|
|
void handleCallOverdefined(CallBase &CB);
|
|
|
|
void handleCallResult(CallBase &CB);
|
|
|
|
void handleCallArguments(CallBase &CB);
|
2018-08-23 19:04:00 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
private:
|
|
|
|
friend class InstVisitor<SCCPSolver>;
|
2001-06-30 07:56:23 +08:00
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// visit implementations - Something changed in this instruction. Either an
|
2001-06-30 07:56:23 +08:00
|
|
|
// operand made a transition, or the instruction is newly executable. Change
|
|
|
|
// the value type of I to reflect these changes if appropriate.
|
2002-06-26 00:13:24 +08:00
|
|
|
void visitPHINode(PHINode &I);
|
2002-04-18 23:13:15 +08:00
|
|
|
|
|
|
|
// Terminators
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2004-12-10 16:02:06 +08:00
|
|
|
void visitReturnInst(ReturnInst &I);
|
2018-10-15 18:10:54 +08:00
|
|
|
void visitTerminator(Instruction &TI);
|
2002-04-18 23:13:15 +08:00
|
|
|
|
2002-08-15 01:53:45 +08:00
|
|
|
void visitCastInst(CastInst &I);
|
2004-03-12 13:52:44 +08:00
|
|
|
void visitSelectInst(SelectInst &I);
|
2019-06-04 05:53:56 +08:00
|
|
|
void visitUnaryOperator(Instruction &I);
|
2002-06-26 00:13:24 +08:00
|
|
|
void visitBinaryOperator(Instruction &I);
|
2006-12-23 14:05:41 +08:00
|
|
|
void visitCmpInst(CmpInst &I);
|
2008-06-20 09:15:44 +08:00
|
|
|
void visitExtractValueInst(ExtractValueInst &EVI);
|
|
|
|
void visitInsertValueInst(InsertValueInst &IVI);
|
2017-10-21 05:47:29 +08:00
|
|
|
|
[IR] Reformulate LLVM's EH funclet IR
While we have successfully implemented a funclet-oriented EH scheme on
top of LLVM IR, our scheme has some notable deficiencies:
- catchendpad and cleanupendpad are necessary in the current design
but they are difficult to explain to others, even to seasoned LLVM
experts.
- catchendpad and cleanupendpad are optimization barriers. They cannot
be split and force all potentially throwing call-sites to be invokes.
This has a noticable effect on the quality of our code generation.
- catchpad, while similar in some aspects to invoke, is fairly awkward.
It is unsplittable, starts a funclet, and has control flow to other
funclets.
- The nesting relationship between funclets is currently a property of
control flow edges. Because of this, we are forced to carefully
analyze the flow graph to see if there might potentially exist illegal
nesting among funclets. While we have logic to clone funclets when
they are illegally nested, it would be nicer if we had a
representation which forbade them upfront.
Let's clean this up a bit by doing the following:
- Instead, make catchpad more like cleanuppad and landingpad: no control
flow, just a bunch of simple operands; catchpad would be splittable.
- Introduce catchswitch, a control flow instruction designed to model
the constraints of funclet oriented EH.
- Make funclet scoping explicit by having funclet instructions consume
the token produced by the funclet which contains them.
- Remove catchendpad and cleanupendpad. Their presence can be inferred
implicitly using coloring information.
N.B. The state numbering code for the CLR has been updated but the
veracity of it's output cannot be spoken for. An expert should take a
look to make sure the results are reasonable.
Reviewers: rnk, JosephTremoulet, andrew.w.kaylor
Differential Revision: http://reviews.llvm.org/D15139
llvm-svn: 255422
2015-12-12 13:38:55 +08:00
|
|
|
void visitCatchSwitchInst(CatchSwitchInst &CPI) {
|
2017-03-08 09:26:37 +08:00
|
|
|
markOverdefined(&CPI);
|
2018-10-15 18:10:54 +08:00
|
|
|
visitTerminator(CPI);
|
2015-08-04 16:21:40 +08:00
|
|
|
}
|
2002-04-18 23:13:15 +08:00
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// Instructions that cannot be folded away.
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
void visitStoreInst (StoreInst &I);
|
2004-01-12 12:29:41 +08:00
|
|
|
void visitLoadInst (LoadInst &I);
|
2002-08-31 07:39:00 +08:00
|
|
|
void visitGetElementPtrInst(GetElementPtrInst &I);
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2009-10-24 12:23:03 +08:00
|
|
|
void visitCallInst (CallInst &I) {
|
2020-04-20 15:05:18 +08:00
|
|
|
visitCallBase(I);
|
2009-09-19 06:35:49 +08:00
|
|
|
}
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2004-12-10 16:02:06 +08:00
|
|
|
void visitInvokeInst (InvokeInst &II) {
|
2020-04-20 15:05:18 +08:00
|
|
|
visitCallBase(II);
|
2018-10-15 18:10:54 +08:00
|
|
|
visitTerminator(II);
|
2003-08-27 09:08:35 +08:00
|
|
|
}
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2019-02-09 04:48:56 +08:00
|
|
|
void visitCallBrInst (CallBrInst &CBI) {
|
2020-04-20 15:05:18 +08:00
|
|
|
visitCallBase(CBI);
|
2019-02-09 04:48:56 +08:00
|
|
|
visitTerminator(CBI);
|
|
|
|
}
|
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
void visitCallBase (CallBase &CB);
|
2018-10-15 18:10:54 +08:00
|
|
|
void visitResumeInst (ResumeInst &I) { /*returns void*/ }
|
|
|
|
void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ }
|
2016-05-05 07:27:13 +08:00
|
|
|
void visitFenceInst (FenceInst &I) { /*returns void*/ }
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2002-06-26 00:13:24 +08:00
|
|
|
void visitInstruction(Instruction &I) {
|
2017-06-17 04:27:17 +08:00
|
|
|
// All the instructions we don't do any special handling for just
|
|
|
|
// go to overdefined.
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
|
2017-06-17 04:27:17 +08:00
|
|
|
markOverdefined(&I);
|
2002-04-18 23:13:15 +08:00
|
|
|
}
|
2001-06-30 07:56:23 +08:00
|
|
|
};
|
2002-07-24 02:06:35 +08:00
|
|
|
|
2007-07-20 16:56:21 +08:00
|
|
|
} // end anonymous namespace
|
|
|
|
|
2002-05-03 05:44:00 +08:00
|
|
|
// getFeasibleSuccessors - Return a vector of booleans to indicate which
|
|
|
|
// successors are reachable from a given terminator instruction.
|
2018-10-15 18:10:54 +08:00
|
|
|
void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
|
2013-07-14 12:42:23 +08:00
|
|
|
SmallVectorImpl<bool> &Succs) {
|
2003-04-25 10:50:03 +08:00
|
|
|
Succs.resize(TI.getNumSuccessors());
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *BI = dyn_cast<BranchInst>(&TI)) {
|
2002-05-03 05:44:00 +08:00
|
|
|
if (BI->isUnconditional()) {
|
|
|
|
Succs[0] = true;
|
2009-11-02 10:30:06 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement BCValue = getValueState(BI->getCondition());
|
2020-03-14 00:40:03 +08:00
|
|
|
ConstantInt *CI = getConstantInt(BCValue);
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!CI) {
|
2009-11-02 10:30:06 +08:00
|
|
|
// Overdefined condition variables, and branches on unfoldable constant
|
|
|
|
// conditions, mean the branch could go either way.
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!BCValue.isUnknownOrUndef())
|
2009-11-02 11:21:36 +08:00
|
|
|
Succs[0] = Succs[1] = true;
|
2009-11-02 10:30:06 +08:00
|
|
|
return;
|
2002-05-03 05:44:00 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-02 10:30:06 +08:00
|
|
|
// Constant condition variables mean the branch can only go a single way.
|
2009-11-02 11:21:36 +08:00
|
|
|
Succs[CI->isZero()] = true;
|
2009-10-29 09:21:20 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2015-08-01 01:58:14 +08:00
|
|
|
// Unwinding instructions successors are always executable.
|
2018-08-26 16:56:42 +08:00
|
|
|
if (TI.isExceptionalTerminator()) {
|
2015-08-01 01:58:14 +08:00
|
|
|
Succs.assign(TI.getNumSuccessors(), true);
|
2009-10-29 09:21:20 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *SI = dyn_cast<SwitchInst>(&TI)) {
|
SwitchInst refactoring.
The purpose of refactoring is to hide operand roles from SwitchInst user (programmer). If you want to play with operands directly, probably you will need lower level methods than SwitchInst ones (TerminatorInst or may be User). After this patch we can reorganize SwitchInst operands and successors as we want.
What was done:
1. Changed semantics of index inside the getCaseValue method:
getCaseValue(0) means "get first case", not a condition. Use getCondition() if you want to resolve the condition. I propose don't mix SwitchInst case indexing with low level indexing (TI successors indexing, User's operands indexing), since it may be dangerous.
2. By the same reason findCaseValue(ConstantInt*) returns actual number of case value. 0 means first case, not default. If there is no case with given value, ErrorIndex will returned.
3. Added getCaseSuccessor method. I propose to avoid usage of TerminatorInst::getSuccessor if you want to resolve case successor BB. Use getCaseSuccessor instead, since internal SwitchInst organization of operands/successors is hidden and may be changed in any moment.
4. Added resolveSuccessorIndex and resolveCaseIndex. The main purpose of these methods is to see how case successors are really mapped in TerminatorInst.
4.1 "resolveSuccessorIndex" was created if you need to level down from SwitchInst to TerminatorInst. It returns TerminatorInst's successor index for given case successor.
4.2 "resolveCaseIndex" converts low level successors index to case index that curresponds to the given successor.
Note: There are also related compatability fix patches for dragonegg, klee, llvm-gcc-4.0, llvm-gcc-4.2, safecode, clang.
llvm-svn: 149481
2012-02-01 15:49:51 +08:00
|
|
|
if (!SI->getNumCases()) {
|
2011-08-17 05:12:35 +08:00
|
|
|
Succs[0] = true;
|
|
|
|
return;
|
|
|
|
}
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement SCValue = getValueState(SI->getCondition());
|
2020-03-14 00:40:03 +08:00
|
|
|
ConstantInt *CI = getConstantInt(SCValue);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2016-07-10 08:35:15 +08:00
|
|
|
if (!CI) { // Overdefined or unknown condition?
|
2002-05-03 05:44:00 +08:00
|
|
|
// All destinations are executable!
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!SCValue.isUnknownOrUndef())
|
2009-11-02 11:21:36 +08:00
|
|
|
Succs.assign(TI.getNumSuccessors(), true);
|
|
|
|
return;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2017-04-12 15:27:28 +08:00
|
|
|
Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
|
2009-10-29 09:21:20 +08:00
|
|
|
return;
|
2002-05-03 05:44:00 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2017-04-10 08:33:25 +08:00
|
|
|
// In case of indirect branch and its address is a blockaddress, we mark
|
|
|
|
// the target as executable.
|
|
|
|
if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
|
|
|
|
// Casts are folded by visitCastInst.
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
|
2020-03-14 00:40:03 +08:00
|
|
|
BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(getConstant(IBRValue));
|
2017-04-10 08:33:25 +08:00
|
|
|
if (!Addr) { // Overdefined or unknown condition?
|
|
|
|
// All destinations are executable!
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!IBRValue.isUnknownOrUndef())
|
2017-04-10 08:33:25 +08:00
|
|
|
Succs.assign(TI.getNumSuccessors(), true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
BasicBlock* T = Addr->getBasicBlock();
|
|
|
|
assert(Addr->getFunction() == T->getParent() &&
|
|
|
|
"Block address of a different function ?");
|
|
|
|
for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) {
|
|
|
|
// This is the target.
|
|
|
|
if (IBR->getDestination(i) == T) {
|
|
|
|
Succs[i] = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we didn't find our destination in the IBR successor list, then we
|
|
|
|
// have undefined behavior. Its ok to assume no successor is executable.
|
2009-10-29 09:21:20 +08:00
|
|
|
return;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2019-02-09 04:48:56 +08:00
|
|
|
// In case of callbr, we pessimistically assume that all successors are
|
|
|
|
// feasible.
|
|
|
|
if (isa<CallBrInst>(&TI)) {
|
|
|
|
Succs.assign(TI.getNumSuccessors(), true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
|
2009-10-29 09:21:20 +08:00
|
|
|
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
|
2002-05-03 05:44:00 +08:00
|
|
|
}
|
|
|
|
|
2002-05-03 05:18:01 +08:00
|
|
|
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
|
2009-11-02 10:33:50 +08:00
|
|
|
// block to the 'To' basic block is currently feasible.
|
2004-11-15 12:44:20 +08:00
|
|
|
bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
|
2018-07-20 07:02:07 +08:00
|
|
|
// Check if we've called markEdgeExecutable on the edge yet. (We could
|
|
|
|
// be more aggressive and try to consider edges which haven't been marked
|
|
|
|
// yet, but there isn't any need.)
|
|
|
|
return KnownFeasibleEdges.count(Edge(From, To));
|
2002-05-03 05:18:01 +08:00
|
|
|
}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// visit Implementations - Something changed in this instruction, either an
|
2001-06-28 07:38:11 +08:00
|
|
|
// operand made a transition, or the instruction is newly executable. Change
|
|
|
|
// the value type of I to reflect these changes if appropriate. This method
|
|
|
|
// makes sure to do the following actions:
|
|
|
|
//
|
|
|
|
// 1. If a phi node merges two constants in, and has conflicting value coming
|
|
|
|
// from different branches, or if the PHI node merges in an overdefined
|
|
|
|
// value, then the PHI node becomes overdefined.
|
|
|
|
// 2. If a phi node merges only constants in, and they all agree on value, the
|
|
|
|
// PHI node becomes a constant value equal to that.
|
|
|
|
// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
|
|
|
|
// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
|
|
|
|
// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
|
|
|
|
// 6. If a conditional branch has a value that is constant, make the selected
|
|
|
|
// destination executable
|
|
|
|
// 7. If a conditional branch has a value that is overdefined, make all
|
|
|
|
// successors executable.
|
2004-11-15 12:44:20 +08:00
|
|
|
void SCCPSolver::visitPHINode(PHINode &PN) {
|
2009-11-04 07:40:48 +08:00
|
|
|
// If this PN returns a struct, just mark the result overdefined.
|
|
|
|
// TODO: We could do a lot better than this if code actually uses this.
|
2010-02-16 19:11:14 +08:00
|
|
|
if (PN.getType()->isStructTy())
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&PN);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-19 19:51:35 +08:00
|
|
|
if (getValueState(&PN).isOverdefined())
|
|
|
|
return; // Quick exit
|
2002-04-18 23:13:15 +08:00
|
|
|
|
2004-03-17 03:49:59 +08:00
|
|
|
// Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
|
|
|
|
// and slow us down a lot. Just mark them overdefined.
|
2009-11-02 11:03:42 +08:00
|
|
|
if (PN.getNumIncomingValues() > 64)
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&PN);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
unsigned NumActiveIncoming = 0;
|
|
|
|
|
2002-04-18 23:13:15 +08:00
|
|
|
// Look at all of the executable operands of the PHI node. If any of them
|
|
|
|
// are overdefined, the PHI becomes overdefined as well. If they are all
|
|
|
|
// constant, and they agree with each other, the PHI becomes the identical
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
// constant. If they are constant and don't agree, the PHI is a constant
|
|
|
|
// range. If there are no executable operands, the PHI remains unknown.
|
|
|
|
ValueLatticeElement PhiState = getValueState(&PN);
|
2003-04-25 10:50:03 +08:00
|
|
|
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
|
2009-11-02 11:03:42 +08:00
|
|
|
if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
|
|
|
|
continue;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
ValueLatticeElement IV = getValueState(PN.getIncomingValue(i));
|
|
|
|
PhiState.mergeIn(IV);
|
|
|
|
NumActiveIncoming++;
|
|
|
|
if (PhiState.isOverdefined())
|
2020-03-19 19:51:35 +08:00
|
|
|
break;
|
2002-04-18 23:13:15 +08:00
|
|
|
}
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
|
|
|
|
// We allow up to 1 range extension per active incoming value and one
|
|
|
|
// additional extension. Note that we manually adjust the number of range
|
|
|
|
// extensions to match the number of active incoming values. This helps to
|
|
|
|
// limit multiple extensions caused by the same incoming value, if other
|
|
|
|
// incoming values are equal.
|
|
|
|
mergeInValue(&PN, PhiState,
|
|
|
|
ValueLatticeElement::MergeOptions().setMaxWidenSteps(
|
|
|
|
NumActiveIncoming + 1));
|
|
|
|
ValueLatticeElement &PhiStateRef = getValueState(&PN);
|
|
|
|
PhiStateRef.setNumRangeExtensions(
|
|
|
|
std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
|
2002-04-18 23:13:15 +08:00
|
|
|
}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
2004-12-10 16:02:06 +08:00
|
|
|
void SCCPSolver::visitReturnInst(ReturnInst &I) {
|
2009-11-02 13:55:40 +08:00
|
|
|
if (I.getNumOperands() == 0) return; // ret void
|
2004-12-10 16:02:06 +08:00
|
|
|
|
|
|
|
Function *F = I.getParent()->getParent();
|
2009-11-04 07:40:48 +08:00
|
|
|
Value *ResultOp = I.getOperand(0);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2008-03-11 13:46:42 +08:00
|
|
|
// If we are tracking the return value of this function, merge it in.
|
2010-02-16 19:11:14 +08:00
|
|
|
if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
|
2020-03-28 23:20:10 +08:00
|
|
|
auto TFRVI = TrackedRetVals.find(F);
|
2009-11-03 11:42:51 +08:00
|
|
|
if (TFRVI != TrackedRetVals.end()) {
|
2009-11-04 07:40:48 +08:00
|
|
|
mergeInValue(TFRVI->second, F, getValueState(ResultOp));
|
2008-03-11 13:46:42 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2008-04-23 13:38:20 +08:00
|
|
|
// Handle functions that return multiple values.
|
2009-11-04 07:40:48 +08:00
|
|
|
if (!TrackedMultipleRetVals.empty()) {
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *STy = dyn_cast<StructType>(ResultOp->getType()))
|
2009-11-04 07:40:48 +08:00
|
|
|
if (MRVFunctionsTracked.count(F))
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
|
|
|
|
mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
|
|
|
|
getStructValueState(ResultOp, i));
|
2004-12-10 16:02:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-15 18:10:54 +08:00
|
|
|
void SCCPSolver::visitTerminator(Instruction &TI) {
|
2007-02-03 05:15:06 +08:00
|
|
|
SmallVector<bool, 16> SuccFeasible;
|
2002-05-03 05:44:00 +08:00
|
|
|
getFeasibleSuccessors(TI, SuccFeasible);
|
2002-05-03 05:18:01 +08:00
|
|
|
|
2003-10-09 00:55:34 +08:00
|
|
|
BasicBlock *BB = TI.getParent();
|
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// Mark all feasible successors executable.
|
2002-05-03 05:44:00 +08:00
|
|
|
for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
|
2003-10-09 00:55:34 +08:00
|
|
|
if (SuccFeasible[i])
|
|
|
|
markEdgeExecutable(BB, TI.getSuccessor(i));
|
2002-04-18 23:13:15 +08:00
|
|
|
}
|
2001-06-28 07:38:11 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
void SCCPSolver::visitCastInst(CastInst &I) {
|
2020-02-14 07:05:50 +08:00
|
|
|
// ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
|
|
|
|
// discover a concrete value later.
|
2020-03-31 16:21:32 +08:00
|
|
|
if (ValueState[&I].isOverdefined())
|
|
|
|
return;
|
2020-02-14 07:05:50 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement OpSt = getValueState(I.getOperand(0));
|
2020-03-14 00:40:03 +08:00
|
|
|
if (Constant *OpC = getConstant(OpSt)) {
|
2016-07-09 03:13:40 +08:00
|
|
|
// Fold the constant as we build.
|
2020-03-14 00:40:03 +08:00
|
|
|
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
|
2016-01-08 05:36:16 +08:00
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
|
|
|
// Propagate constant value
|
|
|
|
markConstant(&I, C);
|
2020-03-31 16:21:32 +08:00
|
|
|
} else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) {
|
|
|
|
auto &LV = getValueState(&I);
|
|
|
|
ConstantRange OpRange = OpSt.getConstantRange();
|
|
|
|
Type *DestTy = I.getDestTy();
|
|
|
|
ConstantRange Res =
|
|
|
|
OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
|
|
|
|
mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
|
2020-03-15 00:50:09 +08:00
|
|
|
} else if (!OpSt.isUnknownOrUndef())
|
2020-03-14 00:40:03 +08:00
|
|
|
markOverdefined(&I);
|
2001-06-28 07:38:11 +08:00
|
|
|
}
|
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
|
|
|
|
// If this returns a struct, mark all elements over defined, we don't track
|
|
|
|
// structs in structs.
|
2010-02-16 19:11:14 +08:00
|
|
|
if (EVI.getType()->isStructTy())
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&EVI);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-04-23 05:00:51 +08:00
|
|
|
// ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
|
|
|
|
// discover a concrete value later.
|
|
|
|
if (ValueState[&EVI].isOverdefined())
|
|
|
|
return (void)markOverdefined(&EVI);
|
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
// If this is extracting from more than one level of struct, we don't know.
|
2009-11-02 11:03:42 +08:00
|
|
|
if (EVI.getNumIndices() != 1)
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&EVI);
|
2009-11-04 07:40:48 +08:00
|
|
|
|
|
|
|
Value *AggVal = EVI.getAggregateOperand();
|
2010-02-16 19:11:14 +08:00
|
|
|
if (AggVal->getType()->isStructTy()) {
|
2009-11-11 06:02:09 +08:00
|
|
|
unsigned i = *EVI.idx_begin();
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement EltVal = getStructValueState(AggVal, i);
|
2009-11-11 06:02:09 +08:00
|
|
|
mergeInValue(getValueState(&EVI), &EVI, EltVal);
|
|
|
|
} else {
|
|
|
|
// Otherwise, must be extracting from an array.
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&EVI);
|
2009-11-11 06:02:09 +08:00
|
|
|
}
|
2008-06-20 09:15:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
|
2016-12-01 16:36:12 +08:00
|
|
|
auto *STy = dyn_cast<StructType>(IVI.getType());
|
2014-04-25 13:29:35 +08:00
|
|
|
if (!STy)
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&IVI);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-02-14 07:05:50 +08:00
|
|
|
// ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
|
|
|
|
// discover a concrete value later.
|
2020-03-14 00:40:03 +08:00
|
|
|
if (isOverdefined(ValueState[&IVI]))
|
2020-03-14 05:30:28 +08:00
|
|
|
return (void)markOverdefined(&IVI);
|
2020-02-14 07:05:50 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
// If this has more than one index, we can't handle it, drive all results to
|
|
|
|
// undef.
|
|
|
|
if (IVI.getNumIndices() != 1)
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&IVI);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
Value *Aggr = IVI.getAggregateOperand();
|
|
|
|
unsigned Idx = *IVI.idx_begin();
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
// Compute the result based on what we're inserting.
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
// This passes through all values that aren't the inserted element.
|
|
|
|
if (i != Idx) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement EltVal = getStructValueState(Aggr, i);
|
2009-11-04 07:40:48 +08:00
|
|
|
mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
|
|
|
|
continue;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
Value *Val = IVI.getInsertedValueOperand();
|
2010-02-16 19:11:14 +08:00
|
|
|
if (Val->getType()->isStructTy())
|
2009-11-04 07:40:48 +08:00
|
|
|
// We don't track structs in structs.
|
|
|
|
markOverdefined(getStructValueState(&IVI, i), &IVI);
|
|
|
|
else {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement InVal = getValueState(Val);
|
2009-11-04 07:40:48 +08:00
|
|
|
mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
|
|
|
|
}
|
|
|
|
}
|
2008-06-20 09:15:44 +08:00
|
|
|
}
|
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
void SCCPSolver::visitSelectInst(SelectInst &I) {
|
2009-11-04 07:40:48 +08:00
|
|
|
// If this select returns a struct, just mark the result overdefined.
|
|
|
|
// TODO: We could do a lot better than this if code actually uses this.
|
2010-02-16 19:11:14 +08:00
|
|
|
if (I.getType()->isStructTy())
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&I);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-02-14 07:05:50 +08:00
|
|
|
// ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
|
|
|
|
// discover a concrete value later.
|
2020-03-14 05:30:28 +08:00
|
|
|
if (ValueState[&I].isOverdefined())
|
|
|
|
return (void)markOverdefined(&I);
|
2020-02-14 07:05:50 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement CondValue = getValueState(I.getCondition());
|
2020-03-15 00:50:09 +08:00
|
|
|
if (CondValue.isUnknownOrUndef())
|
2006-02-08 10:38:11 +08:00
|
|
|
return;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
if (ConstantInt *CondCB = getConstantInt(CondValue)) {
|
2009-11-02 13:55:40 +08:00
|
|
|
Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
|
|
|
|
mergeInValue(&I, getValueState(OpVal));
|
2009-11-02 11:21:36 +08:00
|
|
|
return;
|
2006-02-08 10:38:11 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2006-02-08 10:38:11 +08:00
|
|
|
// Otherwise, the condition is overdefined or a constant we can't evaluate.
|
|
|
|
// See if we can produce something better than overdefined based on the T/F
|
|
|
|
// value.
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement TVal = getValueState(I.getTrueValue());
|
|
|
|
ValueLatticeElement FVal = getValueState(I.getFalseValue());
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-04-14 19:32:43 +08:00
|
|
|
bool Changed = ValueState[&I].mergeIn(TVal);
|
|
|
|
Changed |= ValueState[&I].mergeIn(FVal);
|
2020-03-18 17:25:24 +08:00
|
|
|
if (Changed)
|
|
|
|
pushToWorkListMsg(ValueState[&I], &I);
|
2004-03-12 13:52:44 +08:00
|
|
|
}
|
|
|
|
|
2019-06-04 05:53:56 +08:00
|
|
|
// Handle Unary Operators.
|
|
|
|
void SCCPSolver::visitUnaryOperator(Instruction &I) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement V0State = getValueState(I.getOperand(0));
|
2019-06-04 05:53:56 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement &IV = ValueState[&I];
|
2020-02-14 07:05:50 +08:00
|
|
|
// ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
|
|
|
|
// discover a concrete value later.
|
2020-03-14 05:30:28 +08:00
|
|
|
if (isOverdefined(IV))
|
|
|
|
return (void)markOverdefined(&I);
|
2020-02-14 03:53:15 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
if (isConstant(V0State)) {
|
|
|
|
Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V0State));
|
2019-06-04 05:53:56 +08:00
|
|
|
|
|
|
|
// op Y -> undef.
|
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
|
|
|
return (void)markConstant(IV, &I, C);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If something is undef, wait for it to resolve.
|
2020-03-14 00:40:03 +08:00
|
|
|
if (!isOverdefined(V0State))
|
2019-06-04 05:53:56 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
markOverdefined(&I);
|
|
|
|
}
|
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
// Handle Binary Operators.
|
2004-11-15 12:44:20 +08:00
|
|
|
void SCCPSolver::visitBinaryOperator(Instruction &I) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement V1State = getValueState(I.getOperand(0));
|
|
|
|
ValueLatticeElement V2State = getValueState(I.getOperand(1));
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement &IV = ValueState[&I];
|
2020-03-19 17:24:09 +08:00
|
|
|
if (IV.isOverdefined())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// If something is undef, wait for it to resolve.
|
|
|
|
if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (V1State.isOverdefined() && V2State.isOverdefined())
|
2020-03-14 05:30:28 +08:00
|
|
|
return (void)markOverdefined(&I);
|
2004-01-12 11:57:30 +08:00
|
|
|
|
2020-04-14 18:15:20 +08:00
|
|
|
// If either of the operands is a constant, try to fold it to a constant.
|
2020-03-19 17:24:09 +08:00
|
|
|
// TODO: Use information from notconstant better.
|
2020-04-14 18:15:20 +08:00
|
|
|
if ((V1State.isConstant() || V2State.isConstant())) {
|
|
|
|
Value *V1 = isConstant(V1State) ? getConstant(V1State) : I.getOperand(0);
|
|
|
|
Value *V2 = isConstant(V2State) ? getConstant(V2State) : I.getOperand(1);
|
|
|
|
Value *R = SimplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
|
|
|
|
auto *C = dyn_cast_or_null<Constant>(R);
|
|
|
|
if (C) {
|
|
|
|
// X op Y -> undef.
|
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
|
|
|
// Conservatively assume that the result may be based on operands that may
|
|
|
|
// be undef. Note that we use mergeInValue to combine the constant with
|
|
|
|
// the existing lattice value for I, as different constants might be found
|
|
|
|
// after one of the operands go to overdefined, e.g. due to one operand
|
|
|
|
// being a special floating value.
|
|
|
|
ValueLatticeElement NewV;
|
|
|
|
NewV.markConstant(C, /*MayIncludeUndef=*/true);
|
|
|
|
return (void)mergeInValue(&I, NewV);
|
|
|
|
}
|
2016-01-08 05:36:16 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-04-01 00:08:09 +08:00
|
|
|
// Only use ranges for binary operators on integers.
|
|
|
|
if (!I.getType()->isIntegerTy())
|
|
|
|
return markOverdefined(&I);
|
|
|
|
|
2020-04-14 18:15:20 +08:00
|
|
|
// Try to simplify to a constant range.
|
2020-03-19 17:24:09 +08:00
|
|
|
ConstantRange A = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
|
|
|
|
ConstantRange B = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
|
|
|
|
if (V1State.isConstantRange())
|
|
|
|
A = V1State.getConstantRange();
|
|
|
|
if (V2State.isConstantRange())
|
|
|
|
B = V2State.getConstantRange();
|
|
|
|
|
|
|
|
ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B);
|
2020-03-28 23:20:10 +08:00
|
|
|
mergeInValue(&I, ValueLatticeElement::getRange(R));
|
2020-03-19 17:24:09 +08:00
|
|
|
|
|
|
|
// TODO: Currently we do not exploit special values that produce something
|
|
|
|
// better than overdefined with an overdefined operand for vector or floating
|
|
|
|
// point types, like and <4 x i32> overdefined, zeroinitializer.
|
2002-04-18 23:13:15 +08:00
|
|
|
}
|
2002-08-31 07:39:00 +08:00
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// Handle ICmpInst instruction.
|
2006-12-23 14:05:41 +08:00
|
|
|
void SCCPSolver::visitCmpInst(CmpInst &I) {
|
2018-10-12 17:01:59 +08:00
|
|
|
// Do not cache this lookup, getValueState calls later in the function might
|
|
|
|
// invalidate the reference.
|
2020-03-14 05:30:28 +08:00
|
|
|
if (isOverdefined(ValueState[&I]))
|
|
|
|
return (void)markOverdefined(&I);
|
2006-12-23 14:05:41 +08:00
|
|
|
|
2018-07-20 21:29:12 +08:00
|
|
|
Value *Op1 = I.getOperand(0);
|
|
|
|
Value *Op2 = I.getOperand(1);
|
|
|
|
|
|
|
|
// For parameters, use ParamState which includes constant range info if
|
|
|
|
// available.
|
2020-03-14 00:40:03 +08:00
|
|
|
auto V1State = getValueState(Op1);
|
|
|
|
auto V2State = getValueState(Op2);
|
2018-07-20 21:29:12 +08:00
|
|
|
|
|
|
|
Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State);
|
|
|
|
if (C) {
|
2016-01-08 05:36:16 +08:00
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement CV;
|
2018-07-20 21:29:12 +08:00
|
|
|
CV.markConstant(C);
|
|
|
|
mergeInValue(&I, CV);
|
|
|
|
return;
|
2016-01-08 05:36:16 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2016-07-10 08:35:15 +08:00
|
|
|
// If operands are still unknown, wait for it to resolve.
|
2020-03-15 00:50:09 +08:00
|
|
|
if ((V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) &&
|
2020-03-14 00:40:03 +08:00
|
|
|
!isConstant(ValueState[&I]))
|
2009-11-02 13:55:40 +08:00
|
|
|
return;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
markOverdefined(&I);
|
2006-12-23 14:05:41 +08:00
|
|
|
}
|
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// Handle getelementptr instructions. If all operands are constants then we
|
2002-08-31 07:39:00 +08:00
|
|
|
// can turn this into a getelementptr ConstantExpr.
|
2004-11-15 12:44:20 +08:00
|
|
|
void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
|
2020-03-14 05:30:28 +08:00
|
|
|
if (isOverdefined(ValueState[&I]))
|
|
|
|
return (void)markOverdefined(&I);
|
2004-01-12 12:29:41 +08:00
|
|
|
|
2007-02-03 04:51:48 +08:00
|
|
|
SmallVector<Constant*, 8> Operands;
|
2002-08-31 07:39:00 +08:00
|
|
|
Operands.reserve(I.getNumOperands());
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement State = getValueState(I.getOperand(i));
|
2020-03-15 00:50:09 +08:00
|
|
|
if (State.isUnknownOrUndef())
|
2009-11-02 10:33:50 +08:00
|
|
|
return; // Operands are not resolved yet.
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
if (isOverdefined(State))
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&I);
|
2009-11-02 11:03:42 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
if (Constant *C = getConstant(State)) {
|
|
|
|
Operands.push_back(C);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (void)markOverdefined(&I);
|
2002-08-31 07:39:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Constant *Ptr = Operands[0];
|
2014-08-27 13:25:25 +08:00
|
|
|
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
|
2016-01-08 05:36:16 +08:00
|
|
|
Constant *C =
|
|
|
|
ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
|
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
|
|
|
markConstant(&I, C);
|
2004-01-12 12:29:41 +08:00
|
|
|
}
|
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
void SCCPSolver::visitStoreInst(StoreInst &SI) {
|
2009-11-04 07:40:48 +08:00
|
|
|
// If this store is of a struct, ignore it.
|
2010-02-16 19:11:14 +08:00
|
|
|
if (SI.getOperand(0)->getType()->isStructTy())
|
2009-11-04 07:40:48 +08:00
|
|
|
return;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2004-12-11 13:15:59 +08:00
|
|
|
if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
|
|
|
|
return;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2004-12-11 13:15:59 +08:00
|
|
|
GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
|
2020-03-28 23:20:10 +08:00
|
|
|
auto I = TrackedGlobals.find(GV);
|
2020-03-14 00:40:03 +08:00
|
|
|
if (I == TrackedGlobals.end())
|
|
|
|
return;
|
2004-12-11 13:15:59 +08:00
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
// Get the value we are storing into the global, then merge it.
|
2020-04-26 18:54:35 +08:00
|
|
|
mergeInValue(I->second, GV, getValueState(SI.getOperand(0)),
|
|
|
|
ValueLatticeElement::MergeOptions().setCheckWiden(false));
|
|
|
|
if (I->second.isOverdefined())
|
2004-12-11 13:15:59 +08:00
|
|
|
TrackedGlobals.erase(I); // No need to keep tracking this!
|
|
|
|
}
|
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
|
|
|
|
if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
|
|
|
|
if (I->getType()->isIntegerTy())
|
|
|
|
return ValueLatticeElement::getRange(
|
|
|
|
getConstantRangeFromMetadata(*Ranges));
|
|
|
|
// TODO: Also handle MD_nonnull.
|
|
|
|
return ValueLatticeElement::getOverdefined();
|
|
|
|
}
|
|
|
|
|
2004-01-12 12:29:41 +08:00
|
|
|
// Handle load instructions. If the operand is a constant pointer to a constant
|
|
|
|
// global, we can replace the load with the loaded constant value!
|
2004-11-15 12:44:20 +08:00
|
|
|
void SCCPSolver::visitLoadInst(LoadInst &I) {
|
2020-07-06 03:31:06 +08:00
|
|
|
// If this load is of a struct or the load is volatile, just mark the result
|
|
|
|
// as overdefined.
|
|
|
|
if (I.getType()->isStructTy() || I.isVolatile())
|
2018-07-20 21:29:12 +08:00
|
|
|
return (void)markOverdefined(&I);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-02-14 07:05:50 +08:00
|
|
|
// ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
|
|
|
|
// discover a concrete value later.
|
2020-04-26 18:54:35 +08:00
|
|
|
if (ValueState[&I].isOverdefined())
|
2020-03-14 05:30:28 +08:00
|
|
|
return (void)markOverdefined(&I);
|
2020-02-14 07:05:50 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement PtrVal = getValueState(I.getOperand(0));
|
2020-03-15 00:50:09 +08:00
|
|
|
if (PtrVal.isUnknownOrUndef())
|
|
|
|
return; // The pointer is not resolved yet!
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement &IV = ValueState[&I];
|
2004-01-12 12:29:41 +08:00
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
if (isConstant(PtrVal)) {
|
|
|
|
Constant *Ptr = getConstant(PtrVal);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
// load null is undefined.
|
|
|
|
if (isa<ConstantPointerNull>(Ptr)) {
|
|
|
|
if (NullPointerIsDefined(I.getFunction(), I.getPointerAddressSpace()))
|
|
|
|
return (void)markOverdefined(IV, &I);
|
|
|
|
else
|
2009-11-02 13:55:40 +08:00
|
|
|
return;
|
2020-07-06 03:31:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Transform load (constant global) into the value loaded.
|
|
|
|
if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) {
|
|
|
|
if (!TrackedGlobals.empty()) {
|
|
|
|
// If we are tracking this global, merge in the known value for it.
|
|
|
|
auto It = TrackedGlobals.find(GV);
|
|
|
|
if (It != TrackedGlobals.end()) {
|
|
|
|
mergeInValue(IV, &I, It->second, getMaxWidenStepsOpts());
|
|
|
|
return;
|
|
|
|
}
|
2004-01-12 12:29:41 +08:00
|
|
|
}
|
2004-12-11 13:15:59 +08:00
|
|
|
}
|
2004-01-12 12:29:41 +08:00
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
// Transform load from a constant into a constant if possible.
|
|
|
|
if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) {
|
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
|
|
|
return (void)markConstant(IV, &I, C);
|
|
|
|
}
|
2016-01-08 05:36:16 +08:00
|
|
|
}
|
2009-11-02 13:55:40 +08:00
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
// Fall back to metadata.
|
|
|
|
mergeInValue(&I, getValueFromMetadata(&I));
|
2004-01-12 12:29:41 +08:00
|
|
|
}
|
2004-04-14 03:43:54 +08:00
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
void SCCPSolver::visitCallBase(CallBase &CB) {
|
|
|
|
handleCallResult(CB);
|
|
|
|
handleCallArguments(CB);
|
2020-03-18 04:01:09 +08:00
|
|
|
}
|
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
void SCCPSolver::handleCallOverdefined(CallBase &CB) {
|
|
|
|
Function *F = CB.getCalledFunction();
|
2020-03-18 04:01:09 +08:00
|
|
|
|
|
|
|
// Void return and not tracking callee, just bail.
|
2020-04-20 15:05:18 +08:00
|
|
|
if (CB.getType()->isVoidTy())
|
2020-03-18 04:01:09 +08:00
|
|
|
return;
|
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
// Always mark struct return as overdefined.
|
|
|
|
if (CB.getType()->isStructTy())
|
|
|
|
return (void)markOverdefined(&CB);
|
|
|
|
|
2020-03-18 04:01:09 +08:00
|
|
|
// Otherwise, if we have a single return value case, and if the function is
|
|
|
|
// a declaration, maybe we can constant fold it.
|
2020-07-06 03:31:06 +08:00
|
|
|
if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) {
|
2020-03-18 04:01:09 +08:00
|
|
|
SmallVector<Constant *, 8> Operands;
|
2020-04-20 15:05:18 +08:00
|
|
|
for (auto AI = CB.arg_begin(), E = CB.arg_end(); AI != E; ++AI) {
|
2020-03-18 04:01:09 +08:00
|
|
|
if (AI->get()->getType()->isStructTy())
|
2020-04-20 15:05:18 +08:00
|
|
|
return markOverdefined(&CB); // Can't handle struct args.
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement State = getValueState(*AI);
|
2020-03-18 04:01:09 +08:00
|
|
|
|
|
|
|
if (State.isUnknownOrUndef())
|
|
|
|
return; // Operands are not resolved yet.
|
|
|
|
if (isOverdefined(State))
|
2020-04-20 15:05:18 +08:00
|
|
|
return (void)markOverdefined(&CB);
|
2020-03-18 04:01:09 +08:00
|
|
|
assert(isConstant(State) && "Unknown state!");
|
|
|
|
Operands.push_back(getConstant(State));
|
|
|
|
}
|
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
if (isOverdefined(getValueState(&CB)))
|
|
|
|
return (void)markOverdefined(&CB);
|
2020-03-18 04:01:09 +08:00
|
|
|
|
|
|
|
// If we can constant fold this, mark the result of the call as a
|
|
|
|
// constant.
|
2020-04-20 15:05:18 +08:00
|
|
|
if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) {
|
2020-03-18 04:01:09 +08:00
|
|
|
// call -> undef.
|
|
|
|
if (isa<UndefValue>(C))
|
|
|
|
return;
|
2020-04-20 15:05:18 +08:00
|
|
|
return (void)markConstant(&CB, C);
|
2020-03-18 04:01:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-06 03:31:06 +08:00
|
|
|
// Fall back to metadata.
|
|
|
|
mergeInValue(&CB, getValueFromMetadata(&CB));
|
2020-03-18 04:01:09 +08:00
|
|
|
}
|
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
void SCCPSolver::handleCallArguments(CallBase &CB) {
|
|
|
|
Function *F = CB.getCalledFunction();
|
2020-03-18 04:01:09 +08:00
|
|
|
// If this is a local function that doesn't have its address taken, mark its
|
|
|
|
// entry block executable and merge in the actual arguments to the call into
|
|
|
|
// the formal arguments of the function.
|
|
|
|
if (!TrackingIncomingArguments.empty() &&
|
|
|
|
TrackingIncomingArguments.count(F)) {
|
|
|
|
MarkBlockExecutable(&F->front());
|
|
|
|
|
|
|
|
// Propagate information from this call site into the callee.
|
2020-04-20 15:05:18 +08:00
|
|
|
auto CAI = CB.arg_begin();
|
2020-03-18 04:01:09 +08:00
|
|
|
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
|
|
|
|
++AI, ++CAI) {
|
|
|
|
// If this argument is byval, and if the function is not readonly, there
|
|
|
|
// will be an implicit copy formed of the input aggregate.
|
|
|
|
if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
|
|
|
|
markOverdefined(&*AI);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto *STy = dyn_cast<StructType>(AI->getType())) {
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement CallArg = getStructValueState(*CAI, i);
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg,
|
|
|
|
getMaxWidenStepsOpts());
|
2020-03-18 04:01:09 +08:00
|
|
|
}
|
|
|
|
} else
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts());
|
2020-03-18 04:01:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
void SCCPSolver::handleCallResult(CallBase &CB) {
|
|
|
|
Function *F = CB.getCalledFunction();
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
if (auto *II = dyn_cast<IntrinsicInst>(&CB)) {
|
2018-08-23 19:04:00 +08:00
|
|
|
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
|
2020-04-20 15:05:18 +08:00
|
|
|
if (ValueState[&CB].isOverdefined())
|
2018-08-23 19:04:00 +08:00
|
|
|
return;
|
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
Value *CopyOf = CB.getOperand(0);
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
ValueLatticeElement CopyOfVal = getValueState(CopyOf);
|
2020-07-07 04:17:16 +08:00
|
|
|
auto *PI = getPredicateInfoFor(&CB);
|
|
|
|
assert(PI && "Missing predicate info for ssa.copy");
|
|
|
|
|
|
|
|
CmpInst *Cmp;
|
|
|
|
bool TrueEdge;
|
|
|
|
if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
|
|
|
|
Cmp = dyn_cast<CmpInst>(PBranch->Condition);
|
|
|
|
TrueEdge = PBranch->TrueEdge;
|
|
|
|
} else if (auto *PAssume = dyn_cast<PredicateAssume>(PI)) {
|
|
|
|
Cmp = dyn_cast<CmpInst>(PAssume->Condition);
|
|
|
|
TrueEdge = true;
|
|
|
|
} else {
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
mergeInValue(ValueState[&CB], &CB, CopyOfVal);
|
2018-08-23 19:04:00 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Everything below relies on the condition being a comparison.
|
|
|
|
if (!Cmp) {
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
mergeInValue(ValueState[&CB], &CB, CopyOfVal);
|
2018-08-23 19:04:00 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
Value *RenamedOp = PI->RenamedOp;
|
2018-08-23 19:04:00 +08:00
|
|
|
Value *CmpOp0 = Cmp->getOperand(0);
|
|
|
|
Value *CmpOp1 = Cmp->getOperand(1);
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
// Bail out if neither of the operands matches RenamedOp.
|
|
|
|
if (CmpOp0 != RenamedOp && CmpOp1 != RenamedOp) {
|
|
|
|
mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf));
|
2018-08-23 19:04:00 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-04-07 17:03:46 +08:00
|
|
|
auto Pred = Cmp->getPredicate();
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
if (CmpOp1 == RenamedOp) {
|
2018-08-23 19:04:00 +08:00
|
|
|
std::swap(CmpOp0, CmpOp1);
|
2020-04-07 17:03:46 +08:00
|
|
|
Pred = Cmp->getSwappedPredicate();
|
|
|
|
}
|
2018-08-23 19:04:00 +08:00
|
|
|
|
2020-04-07 17:03:46 +08:00
|
|
|
// Wait until CmpOp1 is resolved.
|
|
|
|
if (getValueState(CmpOp1).isUnknown()) {
|
2020-04-20 15:05:18 +08:00
|
|
|
addAdditionalUser(CmpOp1, &CB);
|
2018-08-23 19:04:00 +08:00
|
|
|
return;
|
|
|
|
}
|
2020-04-07 17:03:46 +08:00
|
|
|
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
// The code below relies on PredicateInfo only inserting copies for the
|
|
|
|
// true branch when the branch condition is an AND and only inserting
|
|
|
|
// copies for the false branch when the branch condition is an OR. This
|
|
|
|
// ensures we can intersect the range from the condition with the range of
|
|
|
|
// CopyOf.
|
2020-07-07 04:17:16 +08:00
|
|
|
if (!TrueEdge)
|
2020-04-07 17:03:46 +08:00
|
|
|
Pred = CmpInst::getInversePredicate(Pred);
|
|
|
|
|
|
|
|
ValueLatticeElement CondVal = getValueState(CmpOp1);
|
2020-04-20 15:05:18 +08:00
|
|
|
ValueLatticeElement &IV = ValueState[&CB];
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
|
|
|
|
auto ImposedCR =
|
2020-04-07 17:03:46 +08:00
|
|
|
ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType()));
|
|
|
|
|
|
|
|
// Get the range imposed by the condition.
|
|
|
|
if (CondVal.isConstantRange())
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
ImposedCR = ConstantRange::makeAllowedICmpRegion(
|
2020-04-07 17:03:46 +08:00
|
|
|
Pred, CondVal.getConstantRange());
|
|
|
|
|
|
|
|
// Combine range info for the original value with the new range from the
|
|
|
|
// condition.
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
auto CopyOfCR = CopyOfVal.isConstantRange()
|
|
|
|
? CopyOfVal.getConstantRange()
|
|
|
|
: ConstantRange::getFull(
|
|
|
|
DL.getTypeSizeInBits(CopyOf->getType()));
|
|
|
|
auto NewCR = ImposedCR.intersectWith(CopyOfCR);
|
|
|
|
// If the existing information is != x, do not use the information from
|
|
|
|
// a chained predicate, as the != x information is more likely to be
|
|
|
|
// helpful in practice.
|
|
|
|
if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
|
|
|
|
NewCR = CopyOfCR;
|
2020-04-07 17:03:46 +08:00
|
|
|
|
2020-04-20 15:05:18 +08:00
|
|
|
addAdditionalUser(CmpOp1, &CB);
|
2020-04-07 17:03:46 +08:00
|
|
|
// TODO: Actually filp MayIncludeUndef for the created range to false,
|
|
|
|
// once most places in the optimizer respect the branches on
|
|
|
|
// undef/poison are UB rule. The reason why the new range cannot be
|
|
|
|
// undef is as follows below:
|
|
|
|
// The new range is based on a branch condition. That guarantees that
|
|
|
|
// neither of the compare operands can be undef in the branch targets,
|
|
|
|
// unless we have conditions that are always true/false (e.g. icmp ule
|
|
|
|
// i32, %a, i32_max). For the latter overdefined/empty range will be
|
|
|
|
// inferred, but the branch will get folded accordingly anyways.
|
|
|
|
mergeInValue(
|
2020-04-20 15:05:18 +08:00
|
|
|
IV, &CB,
|
2020-04-07 17:03:46 +08:00
|
|
|
ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true));
|
|
|
|
return;
|
|
|
|
} else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) {
|
|
|
|
// For non-integer values or integer constant expressions, only
|
|
|
|
// propagate equal constants.
|
2020-04-20 15:05:18 +08:00
|
|
|
addAdditionalUser(CmpOp1, &CB);
|
|
|
|
mergeInValue(IV, &CB, CondVal);
|
2018-08-23 19:04:00 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[SCCP] Use conditional info with AND/OR branch conditions.
Currently SCCP does not combine the information of conditions joined by
AND in the true branch or OR in the false branch.
For branches on AND, 2 copies will be inserted for the true branch, with
one being the operand of the other as in the code below. We can combine
the information using intersection. Note that for the OR case, the
copies are inserted in the false branch, where using intersection is
safe as well.
define void @foo(i32 %a) {
entry:
%lt = icmp ult i32 %a, 100
%gt = icmp ugt i32 %a, 20
%and = and i1 %lt, %gt
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %lt = icmp ult i32 %a, 100 Edge: [label %entry,label %true] }
%a.0 = call i32 @llvm.ssa.copy.140247425954880(i32 %a)
; Has predicate info
; branch predicate info { TrueEdge: 1 Comparison: %gt = icmp ugt i32 %a, 20 Edge: [label %entry,label %false] }
%a.1 = call i32 @llvm.ssa.copy.140247425954880(i32 %a.0)
br i1 %and, label %true, label %false
true: ; preds = %entry
call void @use(i32 %a.1)
%true.1 = icmp ne i32 %a.1, 20
call void @use.i1(i1 %true.1)
ret void
false: ; preds = %entry
call void @use(i32 %a.1)
ret void
}
Reviewers: efriedma, davide, mssimpso, nikic
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D77808
2020-07-09 19:59:24 +08:00
|
|
|
return (void)mergeInValue(IV, &CB, CopyOfVal);
|
2018-08-23 19:04:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-23 13:38:20 +08:00
|
|
|
// The common case is that we aren't tracking the callee, either because we
|
|
|
|
// are not doing interprocedural analysis or the callee is indirect, or is
|
|
|
|
// external. Handle these cases first.
|
2020-03-18 04:01:09 +08:00
|
|
|
if (!F || F->isDeclaration())
|
2020-04-20 15:05:18 +08:00
|
|
|
return handleCallOverdefined(CB);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2008-04-23 13:38:20 +08:00
|
|
|
// If this is a single/zero retval case, see if we're tracking the function.
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
|
2009-11-04 07:40:48 +08:00
|
|
|
if (!MRVFunctionsTracked.count(F))
|
2020-04-20 15:05:18 +08:00
|
|
|
return handleCallOverdefined(CB); // Not tracking this callee.
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
// If we are tracking this callee, propagate the result of the function
|
|
|
|
// into this call site.
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
|
2020-04-20 15:05:18 +08:00
|
|
|
mergeInValue(getStructValueState(&CB, i), &CB,
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
TrackedMultipleRetVals[std::make_pair(F, i)],
|
|
|
|
getMaxWidenStepsOpts());
|
2008-06-20 09:15:44 +08:00
|
|
|
} else {
|
2020-03-28 23:20:10 +08:00
|
|
|
auto TFRVI = TrackedRetVals.find(F);
|
2009-11-04 07:40:48 +08:00
|
|
|
if (TFRVI == TrackedRetVals.end())
|
2020-04-20 15:05:18 +08:00
|
|
|
return handleCallOverdefined(CB); // Not tracking this callee.
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-04 07:40:48 +08:00
|
|
|
// If so, propagate the return value of the callee into this call result.
|
[SCCP] Switch to widen at PHIs, stores and call edges.
Currently SCCP does not widen PHIs, stores or along call edges
(arguments/return values), but on operations that directly extend ranges
(like binary operators).
This means PHIs, stores and call edges are not pessimized by widening
currently, while binary operators are. The main reason for widening
operators initially was that opting-out for certain operations was
more straight-forward in the initial implementation (and it did not
matter too much, as range support initially was only implemented for a
very limited set of operations.
During the discussion in D78391, it was suggested to consider flipping
widening to PHIs, stores and along call edges. After adding support for
tracking the number of range extensions in ValueLattice, limiting the
number of range extensions per value is straight forward.
This patch introduces a MaxWidenSteps option to the MergeOptions,
limiting the number of range extensions per value. For PHIs, it seems
natural allow an extension for each (active) incoming value plus 1. For
the other cases, a arbitrary limit of 10 has been chosen initially. It would
potentially make sense to set it depending on the users of a
function/global, but that still needs investigating. This potentially
leads to more state-changes and longer compile-times.
The results look quite promising (MultiSource, SPEC):
Same hash: 179 (filtered out)
Remaining: 58
Metric: sccp.IPNumInstRemoved
Program base widen-phi diff
test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4%
test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4%
test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3%
test-suite...langs-C/football/football.test 104.00 128.00 23.1%
test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7%
test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4%
test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3%
test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0%
test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3%
test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9%
test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4%
test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1%
test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7%
test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6%
test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4%
test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9%
test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5%
test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3%
test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0%
test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9%
test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9%
test-suite...plications/d/make_dparser.test 53.00 55.00 3.8%
test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6%
test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6%
test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6%
test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5%
test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3%
test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2%
test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5%
test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5%
test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9%
test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6%
test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5%
test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5%
test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4%
test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4%
test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2%
test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2%
test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1%
test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9%
test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9%
test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8%
test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7%
test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7%
test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5%
test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5%
test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5%
test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5%
test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4%
test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3%
test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3%
test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3%
test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2%
test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2%
test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2%
test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1%
test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1%
Reviewers: efriedma, nikic, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D79036
2020-05-29 16:29:39 +08:00
|
|
|
mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts());
|
2004-04-14 03:43:54 +08:00
|
|
|
}
|
|
|
|
}
|
2004-11-15 12:44:20 +08:00
|
|
|
|
|
|
|
void SCCPSolver::Solve() {
|
|
|
|
// Process the work lists until they are empty!
|
2005-04-22 07:48:37 +08:00
|
|
|
while (!BBWorkList.empty() || !InstWorkList.empty() ||
|
2005-04-24 05:38:35 +08:00
|
|
|
!OverdefinedInstWorkList.empty()) {
|
2009-11-02 13:55:40 +08:00
|
|
|
// Process the overdefined instruction's work list first, which drives other
|
|
|
|
// things to overdefined more quickly.
|
2004-11-15 12:44:20 +08:00
|
|
|
while (!OverdefinedInstWorkList.empty()) {
|
2009-11-02 13:55:40 +08:00
|
|
|
Value *I = OverdefinedInstWorkList.pop_back_val();
|
2004-11-15 12:44:20 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
// "I" got into the work list because it either made the transition from
|
2013-02-21 04:15:55 +08:00
|
|
|
// bottom to constant, or to overdefined.
|
2004-11-15 12:44:20 +08:00
|
|
|
//
|
|
|
|
// Anything on this worklist that is overdefined need not be visited
|
|
|
|
// since all of its users will have already been marked as overdefined
|
2009-11-02 10:33:50 +08:00
|
|
|
// Update all of the users of this instruction's value.
|
2004-11-15 12:44:20 +08:00
|
|
|
//
|
2018-08-23 19:04:00 +08:00
|
|
|
markUsersAsChanged(I);
|
2004-11-15 12:44:20 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// Process the instruction work list.
|
2004-11-15 12:44:20 +08:00
|
|
|
while (!InstWorkList.empty()) {
|
2009-11-02 13:55:40 +08:00
|
|
|
Value *I = InstWorkList.pop_back_val();
|
2004-11-15 12:44:20 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2009-11-02 13:55:40 +08:00
|
|
|
// "I" got into the work list because it made the transition from undef to
|
|
|
|
// constant.
|
2004-11-15 12:44:20 +08:00
|
|
|
//
|
|
|
|
// Anything on this worklist that is overdefined need not be visited
|
|
|
|
// since all of its users will have already been marked as overdefined.
|
2009-11-02 10:33:50 +08:00
|
|
|
// Update all of the users of this instruction's value.
|
2004-11-15 12:44:20 +08:00
|
|
|
//
|
2020-03-14 05:30:28 +08:00
|
|
|
if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
|
2018-08-23 19:04:00 +08:00
|
|
|
markUsersAsChanged(I);
|
2004-11-15 12:44:20 +08:00
|
|
|
}
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2009-11-02 10:33:50 +08:00
|
|
|
// Process the basic block work list.
|
2004-11-15 12:44:20 +08:00
|
|
|
while (!BBWorkList.empty()) {
|
|
|
|
BasicBlock *BB = BBWorkList.back();
|
|
|
|
BBWorkList.pop_back();
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
// Notify all instructions in this basic block that they are newly
|
|
|
|
// executable.
|
|
|
|
visit(BB);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-12-20 14:21:33 +08:00
|
|
|
/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
|
2004-12-11 04:41:50 +08:00
|
|
|
/// that branches on undef values cannot reach any of their successors.
|
|
|
|
/// However, this is not a safe assumption. After we solve dataflow, this
|
|
|
|
/// method should be use to handle this. If this returns true, the solver
|
|
|
|
/// should be rerun.
|
2006-10-22 13:59:17 +08:00
|
|
|
///
|
|
|
|
/// This method handles this by finding an unresolved branch and marking it one
|
|
|
|
/// of the edges from the block as being feasible, even though the condition
|
|
|
|
/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
|
|
|
|
/// CFG and only slightly pessimizes the analysis results (by marking one,
|
2006-12-20 14:21:33 +08:00
|
|
|
/// potentially infeasible, edge feasible). This cannot usefully modify the
|
2006-10-22 13:59:17 +08:00
|
|
|
/// constraints on the condition of the branch, as that would impact other users
|
|
|
|
/// of the value.
|
2006-12-20 14:21:33 +08:00
|
|
|
///
|
2020-02-14 07:05:50 +08:00
|
|
|
/// This scan also checks for values that use undefs. It conservatively marks
|
|
|
|
/// them as overdefined.
|
2006-12-20 14:21:33 +08:00
|
|
|
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
|
2016-06-26 20:28:59 +08:00
|
|
|
for (BasicBlock &BB : F) {
|
|
|
|
if (!BBExecutable.count(&BB))
|
2006-10-22 13:59:17 +08:00
|
|
|
continue;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2016-06-26 20:28:59 +08:00
|
|
|
for (Instruction &I : BB) {
|
2006-12-20 14:21:33 +08:00
|
|
|
// Look for instructions which produce undef values.
|
2015-10-14 03:26:58 +08:00
|
|
|
if (I.getType()->isVoidTy()) continue;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *STy = dyn_cast<StructType>(I.getType())) {
|
2011-09-21 07:28:51 +08:00
|
|
|
// Only a few things that can be structs matter for undef.
|
|
|
|
|
|
|
|
// Tracked calls must never be marked overdefined in ResolvedUndefsIn.
|
2020-04-20 15:05:18 +08:00
|
|
|
if (auto *CB = dyn_cast<CallBase>(&I))
|
|
|
|
if (Function *F = CB->getCalledFunction())
|
2020-02-14 03:53:15 +08:00
|
|
|
if (MRVFunctionsTracked.count(F))
|
2011-09-21 07:28:51 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// extractvalue and insertvalue don't need to be marked; they are
|
2012-01-19 05:16:33 +08:00
|
|
|
// tracked as precisely as their operands.
|
2011-09-21 07:28:51 +08:00
|
|
|
if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
|
|
|
|
continue;
|
|
|
|
// Send the results of everything else to overdefined. We could be
|
|
|
|
// more precise than this but it isn't worth bothering.
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement &LV = getStructValueState(&I, i);
|
2020-03-15 00:50:09 +08:00
|
|
|
if (LV.isUnknownOrUndef())
|
2015-10-14 03:26:58 +08:00
|
|
|
markOverdefined(LV, &I);
|
2009-11-04 07:40:48 +08:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
2011-08-17 06:06:31 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement &LV = getValueState(&I);
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!LV.isUnknownOrUndef())
|
2019-07-31 20:57:04 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// There are two reasons a call can have an undef result
|
|
|
|
// 1. It could be tracked.
|
|
|
|
// 2. It could be constant-foldable.
|
|
|
|
// Because of the way we solve return values, tracked calls must
|
|
|
|
// never be marked overdefined in ResolvedUndefsIn.
|
2020-04-20 15:05:18 +08:00
|
|
|
if (auto *CB = dyn_cast<CallBase>(&I))
|
|
|
|
if (Function *F = CB->getCalledFunction())
|
2020-02-14 03:53:15 +08:00
|
|
|
if (TrackedRetVals.count(F))
|
2019-07-31 20:57:04 +08:00
|
|
|
continue;
|
|
|
|
|
2020-02-21 02:46:29 +08:00
|
|
|
if (isa<LoadInst>(I)) {
|
|
|
|
// A load here means one of two things: a load of undef from a global,
|
|
|
|
// a load from an unknown pointer. Either way, having it return undef
|
|
|
|
// is okay.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-02-14 07:05:50 +08:00
|
|
|
markOverdefined(&I);
|
|
|
|
return true;
|
2006-12-20 14:21:33 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2010-04-06 06:14:48 +08:00
|
|
|
// Check to see if we have a branch or switch on an undefined value. If so
|
|
|
|
// we force the branch to go one way or the other to make the successor
|
|
|
|
// values live. It doesn't really matter which way we force it.
|
2018-10-15 18:04:59 +08:00
|
|
|
Instruction *TI = BB.getTerminator();
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *BI = dyn_cast<BranchInst>(TI)) {
|
2006-10-22 13:59:17 +08:00
|
|
|
if (!BI->isConditional()) continue;
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!getValueState(BI->getCondition()).isUnknownOrUndef())
|
2006-10-22 13:59:17 +08:00
|
|
|
continue;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2010-04-06 06:14:48 +08:00
|
|
|
// If the input to SCCP is actually branch on undef, fix the undef to
|
|
|
|
// false.
|
|
|
|
if (isa<UndefValue>(BI->getCondition())) {
|
|
|
|
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
|
2016-06-26 20:28:59 +08:00
|
|
|
markEdgeExecutable(&BB, TI->getSuccessor(1));
|
2010-04-06 06:14:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2010-04-06 06:14:48 +08:00
|
|
|
// Otherwise, it is a branch on a symbolic value which is currently
|
2018-07-20 07:02:07 +08:00
|
|
|
// considered to be undef. Make sure some edge is executable, so a
|
|
|
|
// branch on "undef" always flows somewhere.
|
|
|
|
// FIXME: Distinguish between dead code and an LLVM "undef" value.
|
|
|
|
BasicBlock *DefaultSuccessor = TI->getSuccessor(1);
|
|
|
|
if (markEdgeExecutable(&BB, DefaultSuccessor))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
continue;
|
2010-04-06 06:14:48 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2017-04-10 08:33:25 +08:00
|
|
|
if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
|
|
|
|
// Indirect branch with no successor ?. Its ok to assume it branches
|
|
|
|
// to no target.
|
|
|
|
if (IBR->getNumSuccessors() < 1)
|
|
|
|
continue;
|
|
|
|
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!getValueState(IBR->getAddress()).isUnknownOrUndef())
|
2017-04-10 08:33:25 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// If the input to SCCP is actually branch on undef, fix the undef to
|
|
|
|
// the first successor of the indirect branch.
|
|
|
|
if (isa<UndefValue>(IBR->getAddress())) {
|
|
|
|
IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
|
|
|
|
markEdgeExecutable(&BB, IBR->getSuccessor(0));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, it is a branch on a symbolic value which is currently
|
2018-07-20 07:02:07 +08:00
|
|
|
// considered to be undef. Make sure some edge is executable, so a
|
|
|
|
// branch on "undef" always flows somewhere.
|
|
|
|
// FIXME: IndirectBr on "undef" doesn't actually need to go anywhere:
|
|
|
|
// we can assume the branch has undefined behavior instead.
|
|
|
|
BasicBlock *DefaultSuccessor = IBR->getSuccessor(0);
|
|
|
|
if (markEdgeExecutable(&BB, DefaultSuccessor))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
continue;
|
2017-04-10 08:33:25 +08:00
|
|
|
}
|
|
|
|
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *SI = dyn_cast<SwitchInst>(TI)) {
|
2020-03-15 00:50:09 +08:00
|
|
|
if (!SI->getNumCases() ||
|
|
|
|
!getValueState(SI->getCondition()).isUnknownOrUndef())
|
2006-10-22 13:59:17 +08:00
|
|
|
continue;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2010-04-06 06:14:48 +08:00
|
|
|
// If the input to SCCP is actually switch on undef, fix the undef to
|
|
|
|
// the first constant.
|
|
|
|
if (isa<UndefValue>(SI->getCondition())) {
|
2017-04-12 15:27:28 +08:00
|
|
|
SI->setCondition(SI->case_begin()->getCaseValue());
|
|
|
|
markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
|
2010-04-06 06:14:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2018-07-20 07:02:07 +08:00
|
|
|
// Otherwise, it is a branch on a symbolic value which is currently
|
|
|
|
// considered to be undef. Make sure some edge is executable, so a
|
|
|
|
// branch on "undef" always flows somewhere.
|
|
|
|
// FIXME: Distinguish between dead code and an LLVM "undef" value.
|
|
|
|
BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor();
|
|
|
|
if (markEdgeExecutable(&BB, DefaultSuccessor))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
continue;
|
2008-01-28 08:32:30 +08:00
|
|
|
}
|
2006-10-22 13:59:17 +08:00
|
|
|
}
|
2004-12-11 14:05:53 +08:00
|
|
|
|
2006-10-22 13:59:17 +08:00
|
|
|
return false;
|
2004-12-11 04:41:50 +08:00
|
|
|
}
|
|
|
|
|
2016-07-15 04:25:54 +08:00
|
|
|
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
|
2016-07-14 07:20:04 +08:00
|
|
|
Constant *Const = nullptr;
|
2016-07-14 11:02:34 +08:00
|
|
|
if (V->getType()->isStructTy()) {
|
2020-03-28 23:20:10 +08:00
|
|
|
std::vector<ValueLatticeElement> IVs = Solver.getStructLatticeValueFor(V);
|
|
|
|
if (any_of(IVs,
|
|
|
|
[](const ValueLatticeElement &LV) { return isOverdefined(LV); }))
|
2016-07-14 07:20:04 +08:00
|
|
|
return false;
|
|
|
|
std::vector<Constant *> ConstVals;
|
2019-09-27 23:49:10 +08:00
|
|
|
auto *ST = cast<StructType>(V->getType());
|
2016-07-14 07:20:04 +08:00
|
|
|
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
|
2020-03-28 23:20:10 +08:00
|
|
|
ValueLatticeElement V = IVs[i];
|
2020-03-14 00:40:03 +08:00
|
|
|
ConstVals.push_back(isConstant(V)
|
|
|
|
? Solver.getConstant(V)
|
2016-07-14 07:20:04 +08:00
|
|
|
: UndefValue::get(ST->getElementType(i)));
|
|
|
|
}
|
|
|
|
Const = ConstantStruct::get(ST, ConstVals);
|
|
|
|
} else {
|
2020-03-28 23:20:10 +08:00
|
|
|
const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
|
2020-03-14 00:40:03 +08:00
|
|
|
if (isOverdefined(IV))
|
2016-07-14 07:20:04 +08:00
|
|
|
return false;
|
Recommit r315288: [SCCP] Propagate integer range info for parameters in IPSCCP.
This version of the patch includes a fix addressing a stage2 LTO buildbot
failure and addressed some additional nits.
Original commit message:
This updates the SCCP solver to use of the ValueElement lattice for
parameters, which provides integer range information. The range
information is used to remove unneeded icmp instructions.
For the following function, f() can be optimized to ret i32 2 with
this change
source_filename = "sccp.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @main() local_unnamed_addr #0 {
entry:
%call = tail call fastcc i32 @f(i32 1)
%call1 = tail call fastcc i32 @f(i32 47)
%add3 = add nsw i32 %call, %call1
ret i32 %add3
}
; Function Attrs: noinline norecurse nounwind readnone uwtable
define internal fastcc i32 @f(i32 %x) unnamed_addr #1 {
entry:
%c1 = icmp sle i32 %x, 100
%cmp = icmp sgt i32 %x, 300
%. = select i1 %cmp, i32 1, i32 2
ret i32 %.
}
attributes #1 = { noinline }
Reviewers: davide, sanjoy, efriedma, dberlin
Reviewed By: davide, dberlin
Subscribers: mcrosier, gberry, mssimpso, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D36656
llvm-svn: 316891
2017-10-30 18:07:42 +08:00
|
|
|
|
2020-03-14 00:40:03 +08:00
|
|
|
Const =
|
|
|
|
isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
|
2016-07-14 07:20:04 +08:00
|
|
|
}
|
|
|
|
assert(Const && "Constant is nullptr here!");
|
2018-03-01 09:19:18 +08:00
|
|
|
|
|
|
|
// Replacing `musttail` instructions with constant breaks `musttail` invariant
|
|
|
|
// unless the call itself can be removed
|
|
|
|
CallInst *CI = dyn_cast<CallInst>(V);
|
|
|
|
if (CI && CI->isMustTailCall() && !CI->isSafeToRemove()) {
|
2020-04-20 15:05:18 +08:00
|
|
|
Function *F = CI->getCalledFunction();
|
2018-03-01 09:19:18 +08:00
|
|
|
|
|
|
|
// Don't zap returns of the callee
|
|
|
|
if (F)
|
|
|
|
Solver.AddMustTailCallee(F);
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Can\'t treat the result of musttail call : " << *CI
|
|
|
|
<< " as a constant\n");
|
2018-03-01 09:19:18 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n');
|
2016-07-14 07:20:04 +08:00
|
|
|
|
|
|
|
// Replaces all of the uses of a variable with uses of the constant.
|
2016-07-14 11:02:34 +08:00
|
|
|
V->replaceAllUsesWith(Const);
|
2016-07-14 09:27:29 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-06-17 16:40:47 +08:00
|
|
|
static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
|
2020-06-19 16:27:52 +08:00
|
|
|
SmallPtrSetImpl<Value *> &InsertedValues,
|
|
|
|
Statistic &InstRemovedStat,
|
|
|
|
Statistic &InstReplacedStat) {
|
2020-06-17 16:40:47 +08:00
|
|
|
bool MadeChanges = false;
|
|
|
|
for (Instruction &Inst : make_early_inc_range(BB)) {
|
|
|
|
if (Inst.getType()->isVoidTy())
|
|
|
|
continue;
|
|
|
|
if (tryToReplaceWithConstant(Solver, &Inst)) {
|
|
|
|
if (Inst.isSafeToRemove())
|
|
|
|
Inst.eraseFromParent();
|
|
|
|
// Hey, we just changed something!
|
|
|
|
MadeChanges = true;
|
|
|
|
++InstRemovedStat;
|
2020-06-19 16:27:52 +08:00
|
|
|
} else if (isa<SExtInst>(&Inst)) {
|
|
|
|
Value *ExtOp = Inst.getOperand(0);
|
|
|
|
if (isa<Constant>(ExtOp) || InsertedValues.count(ExtOp))
|
|
|
|
continue;
|
|
|
|
const ValueLatticeElement &IV = Solver.getLatticeValueFor(ExtOp);
|
|
|
|
if (!IV.isConstantRange(/*UndefAllowed=*/false))
|
|
|
|
continue;
|
|
|
|
if (IV.getConstantRange().isAllNonNegative()) {
|
|
|
|
auto *ZExt = new ZExtInst(ExtOp, Inst.getType(), "", &Inst);
|
|
|
|
InsertedValues.insert(ZExt);
|
|
|
|
Inst.replaceAllUsesWith(ZExt);
|
|
|
|
Solver.removeLatticeValueFor(&Inst);
|
|
|
|
Inst.eraseFromParent();
|
|
|
|
InstReplacedStat++;
|
|
|
|
MadeChanges = true;
|
|
|
|
}
|
2020-06-17 16:40:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return MadeChanges;
|
|
|
|
}
|
|
|
|
|
2016-05-18 23:18:25 +08:00
|
|
|
// runSCCP() - Run the Sparse Conditional Constant Propagation algorithm,
|
2004-11-15 12:44:20 +08:00
|
|
|
// and return true if the function was modified.
|
2016-05-18 23:18:25 +08:00
|
|
|
static bool runSCCP(Function &F, const DataLayout &DL,
|
|
|
|
const TargetLibraryInfo *TLI) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 11:09:36 +08:00
|
|
|
SCCPSolver Solver(
|
2020-03-14 00:40:03 +08:00
|
|
|
DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; },
|
|
|
|
F.getContext());
|
2004-11-15 12:44:20 +08:00
|
|
|
|
|
|
|
// Mark the first block of the function as being executable.
|
2015-10-14 03:26:58 +08:00
|
|
|
Solver.MarkBlockExecutable(&F.front());
|
2004-11-15 12:44:20 +08:00
|
|
|
|
2004-11-15 13:45:33 +08:00
|
|
|
// Mark all arguments to the function as being overdefined.
|
2015-10-14 03:26:58 +08:00
|
|
|
for (Argument &AI : F.args())
|
2017-03-08 09:26:37 +08:00
|
|
|
Solver.markOverdefined(&AI);
|
2004-11-15 13:45:33 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
// Solve for constants.
|
2006-12-20 14:21:33 +08:00
|
|
|
bool ResolvedUndefs = true;
|
|
|
|
while (ResolvedUndefs) {
|
2004-12-11 04:41:50 +08:00
|
|
|
Solver.Solve();
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "RESOLVING UNDEFs\n");
|
2006-12-20 14:21:33 +08:00
|
|
|
ResolvedUndefs = Solver.ResolvedUndefsIn(F);
|
2004-12-11 04:41:50 +08:00
|
|
|
}
|
2004-11-15 12:44:20 +08:00
|
|
|
|
2004-11-15 13:45:33 +08:00
|
|
|
bool MadeChanges = false;
|
|
|
|
|
|
|
|
// If we decided that there are basic blocks that are dead in this function,
|
|
|
|
// delete their contents now. Note that we cannot actually delete the blocks,
|
|
|
|
// as we cannot modify the CFG of the function.
|
2007-03-04 12:50:21 +08:00
|
|
|
|
2020-06-19 16:27:52 +08:00
|
|
|
SmallPtrSet<Value *, 32> InsertedValues;
|
2016-06-26 20:28:59 +08:00
|
|
|
for (BasicBlock &BB : F) {
|
|
|
|
if (!Solver.isBlockExecutable(&BB)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB);
|
2016-01-24 14:26:47 +08:00
|
|
|
|
|
|
|
++NumDeadBlocks;
|
2016-06-26 20:28:59 +08:00
|
|
|
NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB);
|
2016-01-24 14:26:47 +08:00
|
|
|
|
2009-11-02 10:47:51 +08:00
|
|
|
MadeChanges = true;
|
|
|
|
continue;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-06-19 16:27:52 +08:00
|
|
|
MadeChanges |= simplifyInstsInBlock(Solver, BB, InsertedValues,
|
|
|
|
NumInstRemoved, NumInstReplaced);
|
2009-11-02 10:47:51 +08:00
|
|
|
}
|
2004-12-10 16:02:06 +08:00
|
|
|
|
|
|
|
return MadeChanges;
|
|
|
|
}
|
|
|
|
|
2016-08-09 08:28:15 +08:00
|
|
|
PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) {
|
2016-05-18 23:18:25 +08:00
|
|
|
const DataLayout &DL = F.getParent()->getDataLayout();
|
|
|
|
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
|
|
|
|
if (!runSCCP(F, DL, &TLI))
|
|
|
|
return PreservedAnalyses::all();
|
2016-05-29 08:31:15 +08:00
|
|
|
|
|
|
|
auto PA = PreservedAnalyses();
|
|
|
|
PA.preserve<GlobalsAA>();
|
2018-06-28 17:53:38 +08:00
|
|
|
PA.preserveSet<CFGAnalyses>();
|
2016-05-29 08:31:15 +08:00
|
|
|
return PA;
|
2016-05-18 23:18:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2016-05-18 23:18:25 +08:00
|
|
|
//===--------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// SCCP Class - This class uses the SCCPSolver to implement a per-function
|
|
|
|
/// Sparse Conditional Constant Propagator.
|
|
|
|
///
|
2016-05-19 23:58:02 +08:00
|
|
|
class SCCPLegacyPass : public FunctionPass {
|
|
|
|
public:
|
2017-10-21 05:47:29 +08:00
|
|
|
// Pass identification, replacement for typeid
|
|
|
|
static char ID;
|
|
|
|
|
|
|
|
SCCPLegacyPass() : FunctionPass(ID) {
|
|
|
|
initializeSCCPLegacyPassPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
|
2016-05-18 23:18:25 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
|
|
|
AU.addPreserved<GlobalsAAWrapperPass>();
|
2018-06-28 17:53:38 +08:00
|
|
|
AU.setPreservesCFG();
|
2016-05-18 23:18:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// runOnFunction - Run the Sparse Conditional Constant Propagation
|
|
|
|
// algorithm, and return true if the function was modified.
|
|
|
|
bool runOnFunction(Function &F) override {
|
|
|
|
if (skipFunction(F))
|
|
|
|
return false;
|
|
|
|
const DataLayout &DL = F.getParent()->getDataLayout();
|
|
|
|
const TargetLibraryInfo *TLI =
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 11:09:36 +08:00
|
|
|
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
|
2016-05-18 23:18:25 +08:00
|
|
|
return runSCCP(F, DL, TLI);
|
|
|
|
}
|
|
|
|
};
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2016-05-18 23:18:25 +08:00
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
char SCCPLegacyPass::ID = 0;
|
2017-10-21 05:47:29 +08:00
|
|
|
|
2016-05-18 23:18:25 +08:00
|
|
|
INITIALIZE_PASS_BEGIN(SCCPLegacyPass, "sccp",
|
|
|
|
"Sparse Conditional Constant Propagation", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(SCCPLegacyPass, "sccp",
|
|
|
|
"Sparse Conditional Constant Propagation", false, false)
|
|
|
|
|
|
|
|
// createSCCPPass - This is the public interface to this file.
|
|
|
|
FunctionPass *llvm::createSCCPPass() { return new SCCPLegacyPass(); }
|
|
|
|
|
2016-07-21 04:17:13 +08:00
|
|
|
static void findReturnsToZap(Function &F,
|
2017-10-14 01:53:44 +08:00
|
|
|
SmallVector<ReturnInst *, 8> &ReturnsToZap,
|
|
|
|
SCCPSolver &Solver) {
|
2016-07-21 04:17:13 +08:00
|
|
|
// We can only do this if we know that nothing else can call the function.
|
2017-10-14 01:53:44 +08:00
|
|
|
if (!Solver.isArgumentTrackedFunction(&F))
|
2016-07-21 04:17:13 +08:00
|
|
|
return;
|
|
|
|
|
2018-03-01 09:19:18 +08:00
|
|
|
// There is a non-removable musttail call site of this function. Zapping
|
|
|
|
// returns is not allowed.
|
|
|
|
if (Solver.isMustTailCallee(&F)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't zap returns of the function : " << F.getName()
|
|
|
|
<< " due to present musttail call of it\n");
|
2018-03-01 09:19:18 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-07-31 20:57:04 +08:00
|
|
|
assert(
|
|
|
|
all_of(F.users(),
|
|
|
|
[&Solver](User *U) {
|
|
|
|
if (isa<Instruction>(U) &&
|
|
|
|
!Solver.isBlockExecutable(cast<Instruction>(U)->getParent()))
|
|
|
|
return true;
|
|
|
|
// Non-callsite uses are not impacted by zapping. Also, constant
|
|
|
|
// uses (like blockaddresses) could stuck around, without being
|
|
|
|
// used in the underlying IR, meaning we do not have lattice
|
|
|
|
// values for them.
|
2020-04-20 15:05:18 +08:00
|
|
|
if (!isa<CallBase>(U))
|
2019-07-31 20:57:04 +08:00
|
|
|
return true;
|
|
|
|
if (U->getType()->isStructTy()) {
|
2020-03-28 23:20:10 +08:00
|
|
|
return all_of(Solver.getStructLatticeValueFor(U),
|
|
|
|
[](const ValueLatticeElement &LV) {
|
|
|
|
return !isOverdefined(LV);
|
|
|
|
});
|
2019-07-31 20:57:04 +08:00
|
|
|
}
|
2020-03-14 00:40:03 +08:00
|
|
|
return !isOverdefined(Solver.getLatticeValueFor(U));
|
2019-07-31 20:57:04 +08:00
|
|
|
}) &&
|
|
|
|
"We can only zap functions where all live users have a concrete value");
|
|
|
|
|
2018-03-01 09:19:18 +08:00
|
|
|
for (BasicBlock &BB : F) {
|
|
|
|
if (CallInst *CI = BB.getTerminatingMustTailCall()) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't zap return of the block due to present "
|
|
|
|
<< "musttail call : " << *CI << "\n");
|
2018-03-01 19:31:44 +08:00
|
|
|
(void)CI;
|
2018-03-01 09:19:18 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-12-01 16:36:12 +08:00
|
|
|
if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
|
2016-07-21 04:17:13 +08:00
|
|
|
if (!isa<UndefValue>(RI->getOperand(0)))
|
|
|
|
ReturnsToZap.push_back(RI);
|
2018-03-01 09:19:18 +08:00
|
|
|
}
|
2016-07-21 04:17:13 +08:00
|
|
|
}
|
|
|
|
|
2018-09-20 17:00:17 +08:00
|
|
|
// Update the condition for terminators that are branching on indeterminate
|
|
|
|
// values, forcing them to use a specific edge.
|
|
|
|
static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) {
|
|
|
|
BasicBlock *Dest = nullptr;
|
|
|
|
Constant *C = nullptr;
|
|
|
|
if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
|
|
|
|
if (!isa<ConstantInt>(SI->getCondition())) {
|
|
|
|
// Indeterminate switch; use first case value.
|
|
|
|
Dest = SI->case_begin()->getCaseSuccessor();
|
|
|
|
C = SI->case_begin()->getCaseValue();
|
|
|
|
}
|
|
|
|
} else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
|
|
|
|
if (!isa<ConstantInt>(BI->getCondition())) {
|
|
|
|
// Indeterminate branch; use false.
|
|
|
|
Dest = BI->getSuccessor(1);
|
|
|
|
C = ConstantInt::getFalse(BI->getContext());
|
|
|
|
}
|
|
|
|
} else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(I)) {
|
|
|
|
if (!isa<BlockAddress>(IBR->getAddress()->stripPointerCasts())) {
|
|
|
|
// Indeterminate indirectbr; use successor 0.
|
|
|
|
Dest = IBR->getSuccessor(0);
|
|
|
|
C = BlockAddress::get(IBR->getSuccessor(0));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
llvm_unreachable("Unexpected terminator instruction");
|
|
|
|
}
|
|
|
|
if (C) {
|
|
|
|
assert(Solver.isEdgeFeasible(I->getParent(), Dest) &&
|
|
|
|
"Didn't find feasible edge?");
|
|
|
|
(void)Dest;
|
|
|
|
|
|
|
|
I->setOperand(0, C);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-23 19:04:00 +08:00
|
|
|
bool llvm::runIPSCCP(
|
Change TargetLibraryInfo analysis passes to always require Function
Summary:
This is the first change to enable the TLI to be built per-function so
that -fno-builtin* handling can be migrated to use function attributes.
See discussion on D61634 for background. This is an enabler for fixing
handling of these options for LTO, for example.
This change should not affect behavior, as the provided function is not
yet used to build a specifically per-function TLI, but rather enables
that migration.
Most of the changes were very mechanical, e.g. passing a Function to the
legacy analysis pass's getTLI interface, or in Module level cases,
adding a callback. This is similar to the way the per-function TTI
analysis works.
There was one place where we were looking for builtins but not in the
context of a specific function. See FindCXAAtExit in
lib/Transforms/IPO/GlobalOpt.cpp. I'm somewhat concerned my workaround
could provide the wrong behavior in some corner cases. Suggestions
welcome.
Reviewers: chandlerc, hfinkel
Subscribers: arsenm, dschuff, jvesely, nhaehnle, mehdi_amini, javed.absar, sbc100, jgravelle-google, eraman, aheejin, steven_wu, george.burgess.iv, dexonsmith, jfb, asbirlea, gchatelet, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66428
llvm-svn: 371284
2019-09-07 11:09:36 +08:00
|
|
|
Module &M, const DataLayout &DL,
|
|
|
|
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
|
2018-11-09 19:52:27 +08:00
|
|
|
function_ref<AnalysisResultsForFn(Function &)> getAnalysis) {
|
2020-03-14 00:40:03 +08:00
|
|
|
SCCPSolver Solver(DL, GetTLI, M.getContext());
|
2004-12-10 16:02:06 +08:00
|
|
|
|
|
|
|
// Loop over all functions, marking arguments to those with their addresses
|
|
|
|
// taken or that are external as overdefined.
|
2016-05-15 04:59:09 +08:00
|
|
|
for (Function &F : M) {
|
|
|
|
if (F.isDeclaration())
|
2009-11-02 14:34:04 +08:00
|
|
|
continue;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2018-11-09 19:52:27 +08:00
|
|
|
Solver.addAnalysis(F, getAnalysis(F));
|
|
|
|
|
2017-10-14 01:53:44 +08:00
|
|
|
// Determine if we can track the function's return values. If so, add the
|
|
|
|
// function to the solver's set of return-tracked functions.
|
|
|
|
if (canTrackReturnsInterprocedurally(&F))
|
2016-05-15 04:59:09 +08:00
|
|
|
Solver.AddTrackedFunction(&F);
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2017-10-14 01:53:44 +08:00
|
|
|
// Determine if we can track the function's arguments. If so, add the
|
|
|
|
// function to the solver's set of argument-tracked functions.
|
|
|
|
if (canTrackArgumentsInterprocedurally(&F)) {
|
|
|
|
Solver.AddArgumentTrackedFunction(&F);
|
|
|
|
continue;
|
2009-11-04 03:24:51 +08:00
|
|
|
}
|
2009-11-03 11:42:51 +08:00
|
|
|
|
|
|
|
// Assume the function is called.
|
2016-05-15 04:59:09 +08:00
|
|
|
Solver.MarkBlockExecutable(&F.front());
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2009-11-03 11:42:51 +08:00
|
|
|
// Assume nothing about the incoming arguments.
|
2016-05-15 04:59:09 +08:00
|
|
|
for (Argument &AI : F.args())
|
2017-03-08 09:26:37 +08:00
|
|
|
Solver.markOverdefined(&AI);
|
2009-11-02 14:34:04 +08:00
|
|
|
}
|
2004-11-15 12:44:20 +08:00
|
|
|
|
2017-10-14 01:53:44 +08:00
|
|
|
// Determine if we can track any of the module's global variables. If so, add
|
|
|
|
// the global variables we can track to the solver's set of tracked global
|
|
|
|
// variables.
|
|
|
|
for (GlobalVariable &G : M.globals()) {
|
|
|
|
G.removeDeadConstantUsers();
|
|
|
|
if (canTrackGlobalVariableInterprocedurally(&G))
|
2015-10-14 03:26:58 +08:00
|
|
|
Solver.TrackValueOfGlobalVariable(&G);
|
2017-10-14 01:53:44 +08:00
|
|
|
}
|
2004-12-11 13:15:59 +08:00
|
|
|
|
2004-12-10 16:02:06 +08:00
|
|
|
// Solve for constants.
|
2006-12-20 14:21:33 +08:00
|
|
|
bool ResolvedUndefs = true;
|
[IPSCCP] Run Solve each time we resolved an undef in a function.
Once we resolved an undef in a function we can run Solve, which could
lead to finding a constant return value for the function, which in turn
could turn undefs into constants in other functions that call it, before
resolving undefs there.
Computationally the amount of work we are doing stays the same, just the
order we process things is slightly different and potentially there are
a few less undefs to resolve.
We are still relying on the order of functions in the IR, which means
depending on the order, we are able to resolve the optimal undef first
or not. For example, if @test1 comes before @testf, we find the constant
return value of @testf too late and we cannot use it while solving
@test1.
This on its own does not lead to more constants removed in the
test-suite, probably because currently we have to be very lucky to visit
applicable functions in the right order.
Maybe we manage to come up with a better way of resolving undefs in more
'profitable' functions first.
Reviewers: efriedma, mssimpso, davide
Reviewed By: efriedma, davide
Differential Revision: https://reviews.llvm.org/D49385
llvm-svn: 337283
2018-07-17 22:04:59 +08:00
|
|
|
Solver.Solve();
|
2006-12-20 14:21:33 +08:00
|
|
|
while (ResolvedUndefs) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n");
|
2006-12-20 14:21:33 +08:00
|
|
|
ResolvedUndefs = false;
|
2016-06-26 20:28:59 +08:00
|
|
|
for (Function &F : M)
|
[IPSCCP] Run Solve each time we resolved an undef in a function.
Once we resolved an undef in a function we can run Solve, which could
lead to finding a constant return value for the function, which in turn
could turn undefs into constants in other functions that call it, before
resolving undefs there.
Computationally the amount of work we are doing stays the same, just the
order we process things is slightly different and potentially there are
a few less undefs to resolve.
We are still relying on the order of functions in the IR, which means
depending on the order, we are able to resolve the optimal undef first
or not. For example, if @test1 comes before @testf, we find the constant
return value of @testf too late and we cannot use it while solving
@test1.
This on its own does not lead to more constants removed in the
test-suite, probably because currently we have to be very lucky to visit
applicable functions in the right order.
Maybe we manage to come up with a better way of resolving undefs in more
'profitable' functions first.
Reviewers: efriedma, mssimpso, davide
Reviewed By: efriedma, davide
Differential Revision: https://reviews.llvm.org/D49385
llvm-svn: 337283
2018-07-17 22:04:59 +08:00
|
|
|
if (Solver.ResolvedUndefsIn(F)) {
|
|
|
|
// We run Solve() after we resolved an undef in a function, because
|
|
|
|
// we might deduce a fact that eliminates an undef in another function.
|
|
|
|
Solver.Solve();
|
|
|
|
ResolvedUndefs = true;
|
|
|
|
}
|
2004-12-11 04:41:50 +08:00
|
|
|
}
|
2004-12-10 16:02:06 +08:00
|
|
|
|
|
|
|
bool MadeChanges = false;
|
|
|
|
|
|
|
|
// Iterate over all of the instructions in the module, replacing them with
|
2004-11-15 12:44:20 +08:00
|
|
|
// constants if we have found them to be of constant values.
|
2007-02-03 05:15:06 +08:00
|
|
|
|
2016-06-26 20:28:59 +08:00
|
|
|
for (Function &F : M) {
|
|
|
|
if (F.isDeclaration())
|
2015-10-14 03:26:58 +08:00
|
|
|
continue;
|
|
|
|
|
2018-11-09 19:52:27 +08:00
|
|
|
SmallVector<BasicBlock *, 512> BlocksToErase;
|
|
|
|
|
2017-10-13 04:52:34 +08:00
|
|
|
if (Solver.isBlockExecutable(&F.front()))
|
2016-06-26 20:28:59 +08:00
|
|
|
for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
|
Recommit r315288: [SCCP] Propagate integer range info for parameters in IPSCCP.
This version of the patch includes a fix addressing a stage2 LTO buildbot
failure and addressed some additional nits.
Original commit message:
This updates the SCCP solver to use of the ValueElement lattice for
parameters, which provides integer range information. The range
information is used to remove unneeded icmp instructions.
For the following function, f() can be optimized to ret i32 2 with
this change
source_filename = "sccp.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @main() local_unnamed_addr #0 {
entry:
%call = tail call fastcc i32 @f(i32 1)
%call1 = tail call fastcc i32 @f(i32 47)
%add3 = add nsw i32 %call, %call1
ret i32 %add3
}
; Function Attrs: noinline norecurse nounwind readnone uwtable
define internal fastcc i32 @f(i32 %x) unnamed_addr #1 {
entry:
%c1 = icmp sle i32 %x, 100
%cmp = icmp sgt i32 %x, 300
%. = select i1 %cmp, i32 1, i32 2
ret i32 %.
}
attributes #1 = { noinline }
Reviewers: davide, sanjoy, efriedma, dberlin
Reviewed By: davide, dberlin
Subscribers: mcrosier, gberry, mssimpso, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D36656
llvm-svn: 316891
2017-10-30 18:07:42 +08:00
|
|
|
++AI) {
|
2017-11-21 08:21:52 +08:00
|
|
|
if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI)) {
|
2016-07-14 09:27:29 +08:00
|
|
|
++IPNumArgsElimed;
|
2017-11-21 08:21:52 +08:00
|
|
|
continue;
|
|
|
|
}
|
Recommit r315288: [SCCP] Propagate integer range info for parameters in IPSCCP.
This version of the patch includes a fix addressing a stage2 LTO buildbot
failure and addressed some additional nits.
Original commit message:
This updates the SCCP solver to use of the ValueElement lattice for
parameters, which provides integer range information. The range
information is used to remove unneeded icmp instructions.
For the following function, f() can be optimized to ret i32 2 with
this change
source_filename = "sccp.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @main() local_unnamed_addr #0 {
entry:
%call = tail call fastcc i32 @f(i32 1)
%call1 = tail call fastcc i32 @f(i32 47)
%add3 = add nsw i32 %call, %call1
ret i32 %add3
}
; Function Attrs: noinline norecurse nounwind readnone uwtable
define internal fastcc i32 @f(i32 %x) unnamed_addr #1 {
entry:
%c1 = icmp sle i32 %x, 100
%cmp = icmp sgt i32 %x, 300
%. = select i1 %cmp, i32 1, i32 2
ret i32 %.
}
attributes #1 = { noinline }
Reviewers: davide, sanjoy, efriedma, dberlin
Reviewed By: davide, dberlin
Subscribers: mcrosier, gberry, mssimpso, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D36656
llvm-svn: 316891
2017-10-30 18:07:42 +08:00
|
|
|
}
|
|
|
|
|
2020-06-19 16:27:52 +08:00
|
|
|
SmallPtrSet<Value *, 32> InsertedValues;
|
2020-06-17 16:40:47 +08:00
|
|
|
for (BasicBlock &BB : F) {
|
|
|
|
if (!Solver.isBlockExecutable(&BB)) {
|
|
|
|
LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB);
|
2016-01-24 14:26:47 +08:00
|
|
|
++NumDeadBlocks;
|
|
|
|
|
|
|
|
MadeChanges = true;
|
2004-12-11 06:29:08 +08:00
|
|
|
|
2020-06-17 16:40:47 +08:00
|
|
|
if (&BB != &F.front())
|
|
|
|
BlocksToErase.push_back(&BB);
|
2009-11-02 10:47:51 +08:00
|
|
|
continue;
|
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-06-19 16:27:52 +08:00
|
|
|
MadeChanges |= simplifyInstsInBlock(Solver, BB, InsertedValues,
|
|
|
|
IPNumInstRemoved, IPNumInstReplaced);
|
2009-11-02 10:47:51 +08:00
|
|
|
}
|
2004-12-11 06:29:08 +08:00
|
|
|
|
2018-11-12 04:22:45 +08:00
|
|
|
DomTreeUpdater DTU = Solver.getDTU(F);
|
2018-11-09 19:52:27 +08:00
|
|
|
// Change dead blocks to unreachable. We do it after replacing constants
|
|
|
|
// in all executable blocks, because changeToUnreachable may remove PHI
|
|
|
|
// nodes in executable blocks we found values for. The function's entry
|
|
|
|
// block is not part of BlocksToErase, so we have to handle it separately.
|
|
|
|
for (BasicBlock *BB : BlocksToErase) {
|
2018-06-26 18:15:02 +08:00
|
|
|
NumInstRemoved +=
|
2018-11-09 19:52:27 +08:00
|
|
|
changeToUnreachable(BB->getFirstNonPHI(), /*UseLLVMTrap=*/false,
|
|
|
|
/*PreserveLCSSA=*/false, &DTU);
|
|
|
|
}
|
2018-06-26 18:15:02 +08:00
|
|
|
if (!Solver.isBlockExecutable(&F.front()))
|
|
|
|
NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(),
|
2018-11-09 19:52:27 +08:00
|
|
|
/*UseLLVMTrap=*/false,
|
|
|
|
/*PreserveLCSSA=*/false, &DTU);
|
2018-06-26 18:15:02 +08:00
|
|
|
|
2018-11-09 19:52:27 +08:00
|
|
|
// Now that all instructions in the function are constant folded,
|
|
|
|
// use ConstantFoldTerminator to get rid of in-edges, record DT updates and
|
|
|
|
// delete dead BBs.
|
|
|
|
for (BasicBlock *DeadBB : BlocksToErase) {
|
2004-12-11 06:29:08 +08:00
|
|
|
// If there are any PHI nodes in this successor, drop entries for BB now.
|
2014-03-09 11:16:01 +08:00
|
|
|
for (Value::user_iterator UI = DeadBB->user_begin(),
|
|
|
|
UE = DeadBB->user_end();
|
|
|
|
UI != UE;) {
|
2009-11-24 00:13:39 +08:00
|
|
|
// Grab the user and then increment the iterator early, as the user
|
|
|
|
// will be deleted. Step past all adjacent uses from the same user.
|
2016-12-01 16:36:12 +08:00
|
|
|
auto *I = dyn_cast<Instruction>(*UI);
|
2009-11-24 00:13:39 +08:00
|
|
|
do { ++UI; } while (UI != UE && *UI == I);
|
|
|
|
|
2009-11-21 04:19:14 +08:00
|
|
|
// Ignore blockaddress users; BasicBlock's dtor will handle them.
|
|
|
|
if (!I) continue;
|
|
|
|
|
2018-09-20 17:00:17 +08:00
|
|
|
// If we have forced an edge for an indeterminate value, then force the
|
|
|
|
// terminator to fold to that edge.
|
|
|
|
forceIndeterminateEdge(I, Solver);
|
2019-05-08 17:09:54 +08:00
|
|
|
BasicBlock *InstBB = I->getParent();
|
|
|
|
bool Folded = ConstantFoldTerminator(InstBB,
|
2018-11-09 19:52:27 +08:00
|
|
|
/*DeleteDeadConditions=*/false,
|
|
|
|
/*TLI=*/nullptr, &DTU);
|
2018-01-08 06:09:44 +08:00
|
|
|
assert(Folded &&
|
|
|
|
"Expect TermInst on constantint or blockaddress to be folded");
|
|
|
|
(void) Folded;
|
2019-05-08 17:09:54 +08:00
|
|
|
// If we folded the terminator to an unconditional branch to another
|
|
|
|
// dead block, replace it with Unreachable, to avoid trying to fold that
|
|
|
|
// branch again.
|
|
|
|
BranchInst *BI = cast<BranchInst>(InstBB->getTerminator());
|
|
|
|
if (BI && BI->isUnconditional() &&
|
|
|
|
!Solver.isBlockExecutable(BI->getSuccessor(0))) {
|
|
|
|
InstBB->getTerminator()->eraseFromParent();
|
|
|
|
new UnreachableInst(InstBB->getContext(), InstBB);
|
|
|
|
}
|
2004-12-11 06:29:08 +08:00
|
|
|
}
|
2018-11-09 19:52:27 +08:00
|
|
|
// Mark dead BB for deletion.
|
|
|
|
DTU.deleteBB(DeadBB);
|
2004-12-11 06:29:08 +08:00
|
|
|
}
|
2018-08-23 19:04:00 +08:00
|
|
|
|
|
|
|
for (BasicBlock &BB : F) {
|
|
|
|
for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
|
|
|
|
Instruction *Inst = &*BI++;
|
2018-08-23 19:17:59 +08:00
|
|
|
if (Solver.getPredicateInfoFor(Inst)) {
|
2018-08-23 19:04:00 +08:00
|
|
|
if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
|
|
|
|
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
|
|
|
|
Value *Op = II->getOperand(0);
|
|
|
|
Inst->replaceAllUsesWith(Op);
|
|
|
|
Inst->eraseFromParent();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2004-12-10 16:02:06 +08:00
|
|
|
}
|
2004-12-11 10:53:57 +08:00
|
|
|
|
|
|
|
// If we inferred constant or undef return values for a function, we replaced
|
|
|
|
// all call uses with the inferred value. This means we don't need to bother
|
|
|
|
// actually returning anything from the function. Replace all return
|
|
|
|
// instructions with return undef.
|
2010-02-27 08:07:42 +08:00
|
|
|
//
|
|
|
|
// Do this in two stages: first identify the functions we should process, then
|
|
|
|
// actually zap their returns. This is important because we can only do this
|
2010-02-27 15:50:40 +08:00
|
|
|
// if the address of the function isn't taken. In cases where a return is the
|
2010-02-27 08:07:42 +08:00
|
|
|
// last use of a function, the order of processing functions would affect
|
2010-02-27 15:50:40 +08:00
|
|
|
// whether other functions are optimizable.
|
2010-02-27 08:07:42 +08:00
|
|
|
SmallVector<ReturnInst*, 8> ReturnsToZap;
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2020-03-28 23:20:10 +08:00
|
|
|
for (const auto &I : Solver.getTrackedRetVals()) {
|
2016-06-26 20:28:59 +08:00
|
|
|
Function *F = I.first;
|
2020-03-14 00:40:03 +08:00
|
|
|
if (isOverdefined(I.second) || F->getReturnType()->isVoidTy())
|
2009-11-03 11:42:51 +08:00
|
|
|
continue;
|
2017-10-14 01:53:44 +08:00
|
|
|
findReturnsToZap(*F, ReturnsToZap, Solver);
|
2016-07-21 04:17:13 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2019-12-23 02:20:17 +08:00
|
|
|
for (auto F : Solver.getMRVFunctionsTracked()) {
|
2016-07-21 04:17:13 +08:00
|
|
|
assert(F->getReturnType()->isStructTy() &&
|
|
|
|
"The return type should be a struct");
|
|
|
|
StructType *STy = cast<StructType>(F->getReturnType());
|
|
|
|
if (Solver.isStructLatticeConstant(F, STy))
|
2017-10-14 01:53:44 +08:00
|
|
|
findReturnsToZap(*F, ReturnsToZap, Solver);
|
2010-02-27 08:07:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Zap all returns which we've identified as zap to change.
|
|
|
|
for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
|
|
|
|
Function *F = ReturnsToZap[i]->getParent()->getParent();
|
|
|
|
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
|
2009-11-03 11:42:51 +08:00
|
|
|
}
|
2012-01-19 05:16:33 +08:00
|
|
|
|
2012-03-28 08:35:33 +08:00
|
|
|
// If we inferred constant or undef values for globals variables, we can
|
|
|
|
// delete the global and any stores that remain to it.
|
2020-03-28 23:20:10 +08:00
|
|
|
for (auto &I : make_early_inc_range(Solver.getTrackedGlobals())) {
|
|
|
|
GlobalVariable *GV = I.first;
|
|
|
|
if (isOverdefined(I.second))
|
2020-03-14 00:40:03 +08:00
|
|
|
continue;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Found that GV '" << GV->getName()
|
|
|
|
<< "' is constant!\n");
|
2004-12-11 13:15:59 +08:00
|
|
|
while (!GV->use_empty()) {
|
2014-03-09 11:16:01 +08:00
|
|
|
StoreInst *SI = cast<StoreInst>(GV->user_back());
|
2004-12-11 13:15:59 +08:00
|
|
|
SI->eraseFromParent();
|
2020-06-05 05:33:03 +08:00
|
|
|
MadeChanges = true;
|
2004-12-11 13:15:59 +08:00
|
|
|
}
|
|
|
|
M.getGlobalList().erase(GV);
|
2004-12-11 14:05:53 +08:00
|
|
|
++IPNumGlobalConst;
|
2004-12-11 13:15:59 +08:00
|
|
|
}
|
2005-04-22 07:48:37 +08:00
|
|
|
|
2004-11-15 12:44:20 +08:00
|
|
|
return MadeChanges;
|
|
|
|
}
|