llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

4137 lines
154 KiB
C++
Raw Normal View History

//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// DependenceAnalysis is an LLVM pass that analyses dependences between memory
// accesses. Currently, it is an (incomplete) implementation of the approach
// described in
//
// Practical Dependence Testing
// Goff, Kennedy, Tseng
// PLDI 1991
//
// There's a single entry point that analyzes the dependence between a pair
// of memory references in a function, returning either NULL, for no dependence,
// or a more-or-less detailed description of the dependence between them.
//
// Currently, the implementation cannot propagate constraints between
// coupled RDIV subscripts and lacks a multi-subscript MIV test.
// Both of these are conservative weaknesses;
// that is, not a source of correctness problems.
//
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3*i - 6] = i; ;; *B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837
2018-03-07 05:55:59 +08:00
// Since Clang linearizes some array subscripts, the dependence
// analysis is using SCEV->delinearize to recover the representation of multiple
// subscripts, and thus avoid the more expensive and less precise MIV tests. The
// delinearization is controlled by the flag -da-delinearize.
//
// We should pay some careful attention to the possibility of integer overflow
// in the implementation of the various tests. This could happen with Add,
// Subtract, or Multiply, with both APInt's and SCEV's.
//
// Some non-linear subscript pairs can be handled by the GCD test
// (and perhaps other tests).
// Should explore how often these things occur.
//
// Finally, it seems like certain test cases expose weaknesses in the SCEV
// simplification, especially in the handling of sign and zero extensions.
// It could be useful to spend time exploring these.
//
// Please note that this is work in progress and the interface is subject to
// change.
//
//===----------------------------------------------------------------------===//
// //
// In memory of Ken Kennedy, 1945 - 2007 //
// //
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "da"
//===----------------------------------------------------------------------===//
// statistics
STATISTIC(TotalArrayPairs, "Array pairs tested");
STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
STATISTIC(ZIVapplications, "ZIV applications");
STATISTIC(ZIVindependence, "ZIV independence");
STATISTIC(StrongSIVapplications, "Strong SIV applications");
STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
STATISTIC(StrongSIVindependence, "Strong SIV independence");
STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
STATISTIC(ExactSIVapplications, "Exact SIV applications");
STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
STATISTIC(ExactSIVindependence, "Exact SIV independence");
STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
STATISTIC(DeltaApplications, "Delta applications");
STATISTIC(DeltaSuccesses, "Delta successes");
STATISTIC(DeltaIndependence, "Delta independence");
STATISTIC(DeltaPropagations, "Delta propagations");
STATISTIC(GCDapplications, "GCD applications");
STATISTIC(GCDsuccesses, "GCD successes");
STATISTIC(GCDindependence, "GCD independence");
STATISTIC(BanerjeeApplications, "Banerjee applications");
STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
static cl::opt<bool>
Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Try to delinearize array references."));
[DA] Add an option to control delinearization validity checks Summary: Dependence Analysis performs static checks to confirm validity of delinearization. These checks often fail for 64-bit targets due to type conversions and integer wrapping that prevent simplification of the SCEV expressions. These checks would also fail at compile-time if the lower bound of the loops are compile-time unknown. For example: void foo(int n, int m, int a[][m]) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { a[i][j] = a[i+1][j-2]; } } opt -mem2reg -instcombine -indvars -loop-simplify -loop-rotate -inline -pass-remarks=.* -debug-pass=Arguments -da-permissive-validity-checks=false k3.ll -analyze -da will produce the following by default: da analyze - anti [* *|<]! but will produce the following expected dependence vector if the validity checks are disabled: da analyze - consistent anti [1 -2]! This revision will introduce a debug option that will leave the validity checks in place by default, but allow them to be turned off. New tests are added for cases where it cannot be proven at compile-time that the individual subscripts stay in-bound with respect to a particular dimension of an array. These tests enable the option to provide user guarantee that the subscripts do not over/under-flow into other dimensions, thereby producing more accurate dependence vectors. For prior discussion on this topic, leading to this change, please see the following thread: http://lists.llvm.org/pipermail/llvm-dev/2019-May/132372.html Reviewers: Meinersbur, jdoerfert, kbarton, dmgreen, fhahn Reviewed By: Meinersbur, jdoerfert, dmgreen Subscribers: fhahn, hiraditya, javed.absar, llvm-commits, Whitney, etiotto Tag: LLVM Differential Revision: https://reviews.llvm.org/D62610 llvm-svn: 362711
2019-06-06 23:12:49 +08:00
static cl::opt<bool> DisableDelinearizationChecks(
"da-disable-delinearization-checks", cl::init(false), cl::Hidden,
cl::ZeroOrMore,
cl::desc(
"Disable checks that try to statically verify validity of "
"delinearized subscripts. Enabling this option may result in incorrect "
"dependence vectors for languages that allow the subscript of one "
"dimension to underflow or overflow into another dimension."));
static cl::opt<unsigned> MIVMaxLevelThreshold(
"da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore,
cl::desc("Maximum depth allowed for the recursive algorithm used to "
"explore MIV direction vectors."));
//===----------------------------------------------------------------------===//
// basics
DependenceAnalysis::Result
DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
auto &AA = FAM.getResult<AAManager>(F);
auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
auto &LI = FAM.getResult<LoopAnalysis>(F);
return DependenceInfo(&F, &AA, &SE, &LI);
}
[PM] Change the static object whose address is used to uniquely identify analyses to have a common type which is enforced rather than using a char object and a `void *` type when used as an identifier. This has a number of advantages. First, it at least helps some of the confusion raised in Justin Lebar's code review of why `void *` was being used everywhere by having a stronger type that connects to documentation about this. However, perhaps more importantly, it addresses a serious issue where the alignment of these pointer-like identifiers was unknown. This made it hard to use them in pointer-like data structures. We were already dodging this in dangerous ways to create the "all analyses" entry. In a subsequent patch I attempted to use these with TinyPtrVector and things fell apart in a very bad way. And it isn't just a compile time or type system issue. Worse than that, the actual alignment of these pointer-like opaque identifiers wasn't guaranteed to be a useful alignment as they were just characters. This change introduces a type to use as the "key" object whose address forms the opaque identifier. This both forces the objects to have proper alignment, and provides type checking that we get it right everywhere. It also makes the types somewhat less mysterious than `void *`. We could go one step further and introduce a truly opaque pointer-like type to return from the `ID()` static function rather than returning `AnalysisKey *`, but that didn't seem to be a clear win so this is just the initial change to get to a reliably typed and aligned object serving is a key for all the analyses. Thanks to Richard Smith and Justin Lebar for helping pick plausible names and avoid making this refactoring many times. =] And thanks to Sean for the super fast review! While here, I've tried to move away from the "PassID" nomenclature entirely as it wasn't really helping and is overloaded with old pass manager constructs. Now we have IDs for analyses, and key objects whose address can be used as IDs. Where possible and clear I've shortened this to just "ID". In a few places I kept "AnalysisID" to make it clear what was being identified. Differential Revision: https://reviews.llvm.org/D27031 llvm-svn: 287783
2016-11-24 01:53:26 +08:00
AnalysisKey DependenceAnalysis::Key;
INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
"Dependence Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
[PM] Port ScalarEvolution to the new pass manager. This change makes ScalarEvolution a stand-alone object and just produces one from a pass as needed. Making this work well requires making the object movable, using references instead of overwritten pointers in a number of places, and other refactorings. I've also wired it up to the new pass manager and added a RUN line to a test to exercise it under the new pass manager. This includes basic printing support much like with other analyses. But there is a big and somewhat scary change here. Prior to this patch ScalarEvolution was never *actually* invalidated!!! Re-running the pass just re-wired up the various other analyses and didn't remove any of the existing entries in the SCEV caches or clear out anything at all. This might seem OK as everything in SCEV that can uses ValueHandles to track updates to the values that serve as SCEV keys. However, this still means that as we ran SCEV over each function in the module, we kept accumulating more and more SCEVs into the cache. At the end, we would have a SCEV cache with every value that we ever needed a SCEV for in the entire module!!! Yowzers. The releaseMemory routine would dump all of this, but that isn't realy called during normal runs of the pipeline as far as I can see. To make matters worse, there *is* actually a key that we don't update with value handles -- there is a map keyed off of Loop*s. Because LoopInfo *does* release its memory from run to run, it is entirely possible to run SCEV over one function, then over another function, and then lookup a Loop* from the second function but find an entry inserted for the first function! Ouch. To make matters still worse, there are plenty of updates that *don't* trip a value handle. It seems incredibly unlikely that today GVN or another pass that invalidates SCEV can update values in *just* such a way that a subsequent run of SCEV will incorrectly find lookups in a cache, but it is theoretically possible and would be a nightmare to debug. With this refactoring, I've fixed all this by actually destroying and recreating the ScalarEvolution object from run to run. Technically, this could increase the amount of malloc traffic we see, but then again it is also technically correct. ;] I don't actually think we're suffering from tons of malloc traffic from SCEV because if we were, the fact that we never clear the memory would seem more likely to have come up as an actual problem before now. So, I've made the simple fix here. If in fact there are serious issues with too much allocation and deallocation, I can work on a clever fix that preserves the allocations (while clearing the data) between each run, but I'd prefer to do that kind of optimization with a test case / benchmark that shows why we need such cleverness (and that can test that we actually make it faster). It's possible that this will make some things faster by making the SCEV caches have higher locality (due to being significantly smaller) so until there is a clear benchmark, I think the simple change is best. Differential Revision: http://reviews.llvm.org/D12063 llvm-svn: 245193
2015-08-17 10:08:17 +08:00
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible with the new pass manager, and no longer relying on analysis groups. This builds essentially a ground-up new AA infrastructure stack for LLVM. The core ideas are the same that are used throughout the new pass manager: type erased polymorphism and direct composition. The design is as follows: - FunctionAAResults is a type-erasing alias analysis results aggregation interface to walk a single query across a range of results from different alias analyses. Currently this is function-specific as we always assume that aliasing queries are *within* a function. - AAResultBase is a CRTP utility providing stub implementations of various parts of the alias analysis result concept, notably in several cases in terms of other more general parts of the interface. This can be used to implement only a narrow part of the interface rather than the entire interface. This isn't really ideal, this logic should be hoisted into FunctionAAResults as currently it will cause a significant amount of redundant work, but it faithfully models the behavior of the prior infrastructure. - All the alias analysis passes are ported to be wrapper passes for the legacy PM and new-style analysis passes for the new PM with a shared result object. In some cases (most notably CFL), this is an extremely naive approach that we should revisit when we can specialize for the new pass manager. - BasicAA has been restructured to reflect that it is much more fundamentally a function analysis because it uses dominator trees and loop info that need to be constructed for each function. All of the references to getting alias analysis results have been updated to use the new aggregation interface. All the preservation and other pass management code has been updated accordingly. The way the FunctionAAResultsWrapperPass works is to detect the available alias analyses when run, and add them to the results object. This means that we should be able to continue to respect when various passes are added to the pipeline, for example adding CFL or adding TBAA passes should just cause their results to be available and to get folded into this. The exception to this rule is BasicAA which really needs to be a function pass due to using dominator trees and loop info. As a consequence, the FunctionAAResultsWrapperPass directly depends on BasicAA and always includes it in the aggregation. This has significant implications for preserving analyses. Generally, most passes shouldn't bother preserving FunctionAAResultsWrapperPass because rebuilding the results just updates the set of known AA passes. The exception to this rule are LoopPass instances which need to preserve all the function analyses that the loop pass manager will end up needing. This means preserving both BasicAAWrapperPass and the aggregating FunctionAAResultsWrapperPass. Now, when preserving an alias analysis, you do so by directly preserving that analysis. This is only necessary for non-immutable-pass-provided alias analyses though, and there are only three of interest: BasicAA, GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is preserved when needed because it (like DominatorTree and LoopInfo) is marked as a CFG-only pass. I've expanded GlobalsAA into the preserved set everywhere we previously were preserving all of AliasAnalysis, and I've added SCEVAA in the intersection of that with where we preserve SCEV itself. One significant challenge to all of this is that the CGSCC passes were actually using the alias analysis implementations by taking advantage of a pretty amazing set of loop holes in the old pass manager's analysis management code which allowed analysis groups to slide through in many cases. Moving away from analysis groups makes this problem much more obvious. To fix it, I've leveraged the flexibility the design of the new PM components provides to just directly construct the relevant alias analyses for the relevant functions in the IPO passes that need them. This is a bit hacky, but should go away with the new pass manager, and is already in many ways cleaner than the prior state. Another significant challenge is that various facilities of the old alias analysis infrastructure just don't fit any more. The most significant of these is the alias analysis 'counter' pass. That pass relied on the ability to snoop on AA queries at different points in the analysis group chain. Instead, I'm planning to build printing functionality directly into the aggregation layer. I've not included that in this patch merely to keep it smaller. Note that all of this needs a nearly complete rewrite of the AA documentation. I'm planning to do that, but I'd like to make sure the new design settles, and to flesh out a bit more of what it looks like in the new pass manager first. Differential Revision: http://reviews.llvm.org/D12080 llvm-svn: 247167
2015-09-10 01:55:00 +08:00
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
true, true)
char DependenceAnalysisWrapperPass::ID = 0;
DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
: FunctionPass(ID) {
initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
}
FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
return new DependenceAnalysisWrapperPass();
}
bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
return false;
}
DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible with the new pass manager, and no longer relying on analysis groups. This builds essentially a ground-up new AA infrastructure stack for LLVM. The core ideas are the same that are used throughout the new pass manager: type erased polymorphism and direct composition. The design is as follows: - FunctionAAResults is a type-erasing alias analysis results aggregation interface to walk a single query across a range of results from different alias analyses. Currently this is function-specific as we always assume that aliasing queries are *within* a function. - AAResultBase is a CRTP utility providing stub implementations of various parts of the alias analysis result concept, notably in several cases in terms of other more general parts of the interface. This can be used to implement only a narrow part of the interface rather than the entire interface. This isn't really ideal, this logic should be hoisted into FunctionAAResults as currently it will cause a significant amount of redundant work, but it faithfully models the behavior of the prior infrastructure. - All the alias analysis passes are ported to be wrapper passes for the legacy PM and new-style analysis passes for the new PM with a shared result object. In some cases (most notably CFL), this is an extremely naive approach that we should revisit when we can specialize for the new pass manager. - BasicAA has been restructured to reflect that it is much more fundamentally a function analysis because it uses dominator trees and loop info that need to be constructed for each function. All of the references to getting alias analysis results have been updated to use the new aggregation interface. All the preservation and other pass management code has been updated accordingly. The way the FunctionAAResultsWrapperPass works is to detect the available alias analyses when run, and add them to the results object. This means that we should be able to continue to respect when various passes are added to the pipeline, for example adding CFL or adding TBAA passes should just cause their results to be available and to get folded into this. The exception to this rule is BasicAA which really needs to be a function pass due to using dominator trees and loop info. As a consequence, the FunctionAAResultsWrapperPass directly depends on BasicAA and always includes it in the aggregation. This has significant implications for preserving analyses. Generally, most passes shouldn't bother preserving FunctionAAResultsWrapperPass because rebuilding the results just updates the set of known AA passes. The exception to this rule are LoopPass instances which need to preserve all the function analyses that the loop pass manager will end up needing. This means preserving both BasicAAWrapperPass and the aggregating FunctionAAResultsWrapperPass. Now, when preserving an alias analysis, you do so by directly preserving that analysis. This is only necessary for non-immutable-pass-provided alias analyses though, and there are only three of interest: BasicAA, GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is preserved when needed because it (like DominatorTree and LoopInfo) is marked as a CFG-only pass. I've expanded GlobalsAA into the preserved set everywhere we previously were preserving all of AliasAnalysis, and I've added SCEVAA in the intersection of that with where we preserve SCEV itself. One significant challenge to all of this is that the CGSCC passes were actually using the alias analysis implementations by taking advantage of a pretty amazing set of loop holes in the old pass manager's analysis management code which allowed analysis groups to slide through in many cases. Moving away from analysis groups makes this problem much more obvious. To fix it, I've leveraged the flexibility the design of the new PM components provides to just directly construct the relevant alias analyses for the relevant functions in the IPO passes that need them. This is a bit hacky, but should go away with the new pass manager, and is already in many ways cleaner than the prior state. Another significant challenge is that various facilities of the old alias analysis infrastructure just don't fit any more. The most significant of these is the alias analysis 'counter' pass. That pass relied on the ability to snoop on AA queries at different points in the analysis group chain. Instead, I'm planning to build printing functionality directly into the aggregation layer. I've not included that in this patch merely to keep it smaller. Note that all of this needs a nearly complete rewrite of the AA documentation. I'm planning to do that, but I'd like to make sure the new design settles, and to flesh out a bit more of what it looks like in the new pass manager first. Differential Revision: http://reviews.llvm.org/D12080 llvm-svn: 247167
2015-09-10 01:55:00 +08:00
AU.addRequiredTransitive<AAResultsWrapperPass>();
[PM] Port ScalarEvolution to the new pass manager. This change makes ScalarEvolution a stand-alone object and just produces one from a pass as needed. Making this work well requires making the object movable, using references instead of overwritten pointers in a number of places, and other refactorings. I've also wired it up to the new pass manager and added a RUN line to a test to exercise it under the new pass manager. This includes basic printing support much like with other analyses. But there is a big and somewhat scary change here. Prior to this patch ScalarEvolution was never *actually* invalidated!!! Re-running the pass just re-wired up the various other analyses and didn't remove any of the existing entries in the SCEV caches or clear out anything at all. This might seem OK as everything in SCEV that can uses ValueHandles to track updates to the values that serve as SCEV keys. However, this still means that as we ran SCEV over each function in the module, we kept accumulating more and more SCEVs into the cache. At the end, we would have a SCEV cache with every value that we ever needed a SCEV for in the entire module!!! Yowzers. The releaseMemory routine would dump all of this, but that isn't realy called during normal runs of the pipeline as far as I can see. To make matters worse, there *is* actually a key that we don't update with value handles -- there is a map keyed off of Loop*s. Because LoopInfo *does* release its memory from run to run, it is entirely possible to run SCEV over one function, then over another function, and then lookup a Loop* from the second function but find an entry inserted for the first function! Ouch. To make matters still worse, there are plenty of updates that *don't* trip a value handle. It seems incredibly unlikely that today GVN or another pass that invalidates SCEV can update values in *just* such a way that a subsequent run of SCEV will incorrectly find lookups in a cache, but it is theoretically possible and would be a nightmare to debug. With this refactoring, I've fixed all this by actually destroying and recreating the ScalarEvolution object from run to run. Technically, this could increase the amount of malloc traffic we see, but then again it is also technically correct. ;] I don't actually think we're suffering from tons of malloc traffic from SCEV because if we were, the fact that we never clear the memory would seem more likely to have come up as an actual problem before now. So, I've made the simple fix here. If in fact there are serious issues with too much allocation and deallocation, I can work on a clever fix that preserves the allocations (while clearing the data) between each run, but I'd prefer to do that kind of optimization with a test case / benchmark that shows why we need such cleverness (and that can test that we actually make it faster). It's possible that this will make some things faster by making the SCEV caches have higher locality (due to being significantly smaller) so until there is a clear benchmark, I think the simple change is best. Differential Revision: http://reviews.llvm.org/D12063 llvm-svn: 245193
2015-08-17 10:08:17 +08:00
AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
// Used to test the dependence analyzer.
// Looks through the function, noting instructions that may access memory.
// Calls depends() on every possible pair and prints out the result.
// Ignores all other instructions.
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
auto *F = DA->getFunction();
for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
++SrcI) {
if (SrcI->mayReadOrWriteMemory()) {
for (inst_iterator DstI = SrcI, DstE = inst_end(F);
DstI != DstE; ++DstI) {
if (DstI->mayReadOrWriteMemory()) {
OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
OS << " da analyze - ";
if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
D->dump(OS);
for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
if (D->isSplitable(Level)) {
OS << " da analyze - split level = " << Level;
OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
OS << "!\n";
}
}
}
else
OS << "none!\n";
}
}
}
}
}
void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
const Module *) const {
dumpExampleDependence(OS, info.get());
}
PreservedAnalyses
DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F));
return PreservedAnalyses::all();
}
//===----------------------------------------------------------------------===//
// Dependence methods
// Returns true if this is an input dependence.
bool Dependence::isInput() const {
return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
}
// Returns true if this is an output dependence.
bool Dependence::isOutput() const {
return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
}
// Returns true if this is an flow (aka true) dependence.
bool Dependence::isFlow() const {
return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
}
// Returns true if this is an anti dependence.
bool Dependence::isAnti() const {
return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
}
// Returns true if a particular level is scalar; that is,
// if no subscript in the source or destination mention the induction
// variable associated with the loop at this level.
// Leave this out of line, so it will serve as a virtual method anchor
bool Dependence::isScalar(unsigned level) const {
return false;
}
//===----------------------------------------------------------------------===//
// FullDependence methods
2015-03-05 09:25:19 +08:00
FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
bool PossiblyLoopIndependent,
2015-03-05 09:25:19 +08:00
unsigned CommonLevels)
: Dependence(Source, Destination), Levels(CommonLevels),
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
if (CommonLevels)
DV = std::make_unique<DVEntry[]>(CommonLevels);
}
// The rest are simple getters that hide the implementation.
// getDirection - Returns the direction associated with a particular level.
unsigned FullDependence::getDirection(unsigned Level) const {
assert(0 < Level && Level <= Levels && "Level out of range");
return DV[Level - 1].Direction;
}
// Returns the distance (or NULL) associated with a particular level.
const SCEV *FullDependence::getDistance(unsigned Level) const {
assert(0 < Level && Level <= Levels && "Level out of range");
return DV[Level - 1].Distance;
}
// Returns true if a particular level is scalar; that is,
// if no subscript in the source or destination mention the induction
// variable associated with the loop at this level.
bool FullDependence::isScalar(unsigned Level) const {
assert(0 < Level && Level <= Levels && "Level out of range");
return DV[Level - 1].Scalar;
}
// Returns true if peeling the first iteration from this loop
// will break this dependence.
bool FullDependence::isPeelFirst(unsigned Level) const {
assert(0 < Level && Level <= Levels && "Level out of range");
return DV[Level - 1].PeelFirst;
}
// Returns true if peeling the last iteration from this loop
// will break this dependence.
bool FullDependence::isPeelLast(unsigned Level) const {
assert(0 < Level && Level <= Levels && "Level out of range");
return DV[Level - 1].PeelLast;
}
// Returns true if splitting this loop will break the dependence.
bool FullDependence::isSplitable(unsigned Level) const {
assert(0 < Level && Level <= Levels && "Level out of range");
return DV[Level - 1].Splitable;
}
//===----------------------------------------------------------------------===//
// DependenceInfo::Constraint methods
// If constraint is a point <X, Y>, returns X.
// Otherwise assert.
const SCEV *DependenceInfo::Constraint::getX() const {
assert(Kind == Point && "Kind should be Point");
return A;
}
// If constraint is a point <X, Y>, returns Y.
// Otherwise assert.
const SCEV *DependenceInfo::Constraint::getY() const {
assert(Kind == Point && "Kind should be Point");
return B;
}
// If constraint is a line AX + BY = C, returns A.
// Otherwise assert.
const SCEV *DependenceInfo::Constraint::getA() const {
assert((Kind == Line || Kind == Distance) &&
"Kind should be Line (or Distance)");
return A;
}
// If constraint is a line AX + BY = C, returns B.
// Otherwise assert.
const SCEV *DependenceInfo::Constraint::getB() const {
assert((Kind == Line || Kind == Distance) &&
"Kind should be Line (or Distance)");
return B;
}
// If constraint is a line AX + BY = C, returns C.
// Otherwise assert.
const SCEV *DependenceInfo::Constraint::getC() const {
assert((Kind == Line || Kind == Distance) &&
"Kind should be Line (or Distance)");
return C;
}
// If constraint is a distance, returns D.
// Otherwise assert.
const SCEV *DependenceInfo::Constraint::getD() const {
assert(Kind == Distance && "Kind should be Distance");
return SE->getNegativeSCEV(C);
}
// Returns the loop associated with this constraint.
const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
assert((Kind == Distance || Kind == Line || Kind == Point) &&
"Kind should be Distance, Line, or Point");
return AssociatedLoop;
}
void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
const Loop *CurLoop) {
Kind = Point;
A = X;
B = Y;
AssociatedLoop = CurLoop;
}
void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
const SCEV *CC, const Loop *CurLoop) {
Kind = Line;
A = AA;
B = BB;
C = CC;
AssociatedLoop = CurLoop;
}
void DependenceInfo::Constraint::setDistance(const SCEV *D,
const Loop *CurLoop) {
Kind = Distance;
A = SE->getOne(D->getType());
B = SE->getNegativeSCEV(A);
C = SE->getNegativeSCEV(D);
AssociatedLoop = CurLoop;
}
void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
SE = NewSE;
Kind = Any;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// For debugging purposes. Dumps the constraint out to OS.
LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
if (isEmpty())
OS << " Empty\n";
else if (isAny())
OS << " Any\n";
else if (isPoint())
OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
else if (isDistance())
OS << " Distance is " << *getD() <<
" (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
else if (isLine())
OS << " Line is " << *getA() << "*X + " <<
*getB() << "*Y = " << *getC() << "\n";
else
llvm_unreachable("unknown constraint type in Constraint::dump");
}
#endif
// Updates X with the intersection
// of the Constraints X and Y. Returns true if X has changed.
// Corresponds to Figure 4 from the paper
//
// Practical Dependence Testing
// Goff, Kennedy, Tseng
// PLDI 1991
bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
++DeltaApplications;
LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
assert(!Y->isPoint() && "Y must not be a Point");
if (X->isAny()) {
if (Y->isAny())
return false;
*X = *Y;
return true;
}
if (X->isEmpty())
return false;
if (Y->isEmpty()) {
X->setEmpty();
return true;
}
if (X->isDistance() && Y->isDistance()) {
LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
return false;
if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
X->setEmpty();
++DeltaSuccesses;
return true;
}
// Hmmm, interesting situation.
// I guess if either is constant, keep it and ignore the other.
if (isa<SCEVConstant>(Y->getD())) {
*X = *Y;
return true;
}
return false;
}
// At this point, the pseudo-code in Figure 4 of the paper
// checks if (X->isPoint() && Y->isPoint()).
// This case can't occur in our implementation,
// since a Point can only arise as the result of intersecting
// two Line constraints, and the right-hand value, Y, is never
// the result of an intersection.
assert(!(X->isPoint() && Y->isPoint()) &&
"We shouldn't ever see X->isPoint() && Y->isPoint()");
if (X->isLine() && Y->isLine()) {
LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");
const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
// slopes are equal, so lines are parallel
LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
Prod1 = SE->getMulExpr(X->getC(), Y->getB());
Prod2 = SE->getMulExpr(X->getB(), Y->getC());
if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
return false;
if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
X->setEmpty();
++DeltaSuccesses;
return true;
}
return false;
}
if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
// slopes differ, so lines intersect
LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
const SCEVConstant *C1A2_C2A1 =
dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
const SCEVConstant *C1B2_C2B1 =
dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
const SCEVConstant *A1B2_A2B1 =
dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
const SCEVConstant *A2B1_A1B2 =
dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
if (!C1B2_C2B1 || !C1A2_C2A1 ||
!A1B2_A2B1 || !A2B1_A1B2)
return false;
APInt Xtop = C1B2_C2B1->getAPInt();
APInt Xbot = A1B2_A2B1->getAPInt();
APInt Ytop = C1A2_C2A1->getAPInt();
APInt Ybot = A2B1_A1B2->getAPInt();
LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
APInt Xq = Xtop; // these need to be initialized, even
APInt Xr = Xtop; // though they're just going to be overwritten
APInt::sdivrem(Xtop, Xbot, Xq, Xr);
APInt Yq = Ytop;
APInt Yr = Ytop;
APInt::sdivrem(Ytop, Ybot, Yq, Yr);
if (Xr != 0 || Yr != 0) {
X->setEmpty();
++DeltaSuccesses;
return true;
}
LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
if (Xq.slt(0) || Yq.slt(0)) {
X->setEmpty();
++DeltaSuccesses;
return true;
}
if (const SCEVConstant *CUB =
collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
const APInt &UpperBound = CUB->getAPInt();
LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
X->setEmpty();
++DeltaSuccesses;
return true;
}
}
X->setPoint(SE->getConstant(Xq),
SE->getConstant(Yq),
X->getAssociatedLoop());
++DeltaSuccesses;
return true;
}
return false;
}
// if (X->isLine() && Y->isPoint()) This case can't occur.
assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
if (X->isPoint() && Y->isLine()) {
LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");
const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
return false;
if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
X->setEmpty();
++DeltaSuccesses;
return true;
}
return false;
}
llvm_unreachable("shouldn't reach the end of Constraint intersection");
return false;
}
//===----------------------------------------------------------------------===//
// DependenceInfo methods
// For debugging purposes. Dumps a dependence to OS.
void Dependence::dump(raw_ostream &OS) const {
bool Splitable = false;
if (isConfused())
OS << "confused";
else {
if (isConsistent())
OS << "consistent ";
if (isFlow())
OS << "flow";
else if (isOutput())
OS << "output";
else if (isAnti())
OS << "anti";
else if (isInput())
OS << "input";
unsigned Levels = getLevels();
OS << " [";
for (unsigned II = 1; II <= Levels; ++II) {
if (isSplitable(II))
Splitable = true;
if (isPeelFirst(II))
OS << 'p';
const SCEV *Distance = getDistance(II);
if (Distance)
OS << *Distance;
else if (isScalar(II))
OS << "S";
else {
unsigned Direction = getDirection(II);
if (Direction == DVEntry::ALL)
OS << "*";
else {
if (Direction & DVEntry::LT)
OS << "<";
if (Direction & DVEntry::EQ)
OS << "=";
if (Direction & DVEntry::GT)
OS << ">";
}
}
if (isPeelLast(II))
OS << 'p';
if (II < Levels)
OS << " ";
}
if (isLoopIndependent())
OS << "|<";
OS << "]";
if (Splitable)
OS << " splitable";
}
OS << "!\n";
}
// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
// underlaying objects. If LocA and LocB are known to not alias (for any reason:
// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
// Otherwise the underlying objects are checked to see if they point to
// different identifiable objects.
static AliasResult underlyingObjectsAlias(AAResults *AA,
const DataLayout &DL,
const MemoryLocation &LocA,
const MemoryLocation &LocB) {
// Check the original locations (minus size) for noalias, which can happen for
// tbaa, incompatible underlying object locations, etc.
MemoryLocation LocAS =
MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags);
MemoryLocation LocBS =
MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags);
if (AA->isNoAlias(LocAS, LocBS))
return AliasResult::NoAlias;
// Check the underlying objects are the same
const Value *AObj = getUnderlyingObject(LocA.Ptr);
const Value *BObj = getUnderlyingObject(LocB.Ptr);
// If the underlying objects are the same, they must alias
if (AObj == BObj)
return AliasResult::MustAlias;
// We may have hit the recursion limit for underlying objects, or have
// underlying objects where we don't know they will alias.
if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
return AliasResult::MayAlias;
// Otherwise we know the objects are different and both identified objects so
// must not alias.
return AliasResult::NoAlias;
}
// Returns true if the load or store can be analyzed. Atomic and volatile
// operations have properties which this analysis does not understand.
static
bool isLoadOrStore(const Instruction *I) {
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->isUnordered();
else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isUnordered();
return false;
}
// Examines the loop nesting of the Src and Dst
// instructions and establishes their shared loops. Sets the variables
// CommonLevels, SrcLevels, and MaxLevels.
// The source and destination instructions needn't be contained in the same
// loop. The routine establishNestingLevels finds the level of most deeply
// nested loop that contains them both, CommonLevels. An instruction that's
// not contained in a loop is at level = 0. MaxLevels is equal to the level
// of the source plus the level of the destination, minus CommonLevels.
// This lets us allocate vectors MaxLevels in length, with room for every
// distinct loop referenced in both the source and destination subscripts.
// The variable SrcLevels is the nesting depth of the source instruction.
// It's used to help calculate distinct loops referenced by the destination.
// Here's the map from loops to levels:
// 0 - unused
// 1 - outermost common loop
// ... - other common loops
// CommonLevels - innermost common loop
// ... - loops containing Src but not Dst
// SrcLevels - innermost loop containing Src but not Dst
// ... - loops containing Dst but not Src
// MaxLevels - innermost loops containing Dst but not Src
// Consider the follow code fragment:
// for (a = ...) {
// for (b = ...) {
// for (c = ...) {
// for (d = ...) {
// A[] = ...;
// }
// }
// for (e = ...) {
// for (f = ...) {
// for (g = ...) {
// ... = A[];
// }
// }
// }
// }
// }
// If we're looking at the possibility of a dependence between the store
// to A (the Src) and the load from A (the Dst), we'll note that they
// have 2 loops in common, so CommonLevels will equal 2 and the direction
// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
// A map from loop names to loop numbers would look like
// a - 1
// b - 2 = CommonLevels
// c - 3
// d - 4 = SrcLevels
// e - 5
// f - 6
// g - 7 = MaxLevels
void DependenceInfo::establishNestingLevels(const Instruction *Src,
const Instruction *Dst) {
const BasicBlock *SrcBlock = Src->getParent();
const BasicBlock *DstBlock = Dst->getParent();
unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
unsigned DstLevel = LI->getLoopDepth(DstBlock);
const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
const Loop *DstLoop = LI->getLoopFor(DstBlock);
SrcLevels = SrcLevel;
MaxLevels = SrcLevel + DstLevel;
while (SrcLevel > DstLevel) {
SrcLoop = SrcLoop->getParentLoop();
SrcLevel--;
}
while (DstLevel > SrcLevel) {
DstLoop = DstLoop->getParentLoop();
DstLevel--;
}
while (SrcLoop != DstLoop) {
SrcLoop = SrcLoop->getParentLoop();
DstLoop = DstLoop->getParentLoop();
SrcLevel--;
}
CommonLevels = SrcLevel;
MaxLevels -= CommonLevels;
}
// Given one of the loops containing the source, return
// its level index in our numbering scheme.
unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
return SrcLoop->getLoopDepth();
}
// Given one of the loops containing the destination,
// return its level index in our numbering scheme.
unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
unsigned D = DstLoop->getLoopDepth();
if (D > CommonLevels)
return D - CommonLevels + SrcLevels;
else
return D;
}
// Returns true if Expression is loop invariant in LoopNest.
bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
const Loop *LoopNest) const {
if (!LoopNest)
return true;
return SE->isLoopInvariant(Expression, LoopNest) &&
isLoopInvariant(Expression, LoopNest->getParentLoop());
}
// Finds the set of loops from the LoopNest that
// have a level <= CommonLevels and are referred to by the SCEV Expression.
void DependenceInfo::collectCommonLoops(const SCEV *Expression,
const Loop *LoopNest,
SmallBitVector &Loops) const {
while (LoopNest) {
unsigned Level = LoopNest->getLoopDepth();
if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
Loops.set(Level);
LoopNest = LoopNest->getParentLoop();
}
}
void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
unsigned widestWidthSeen = 0;
Type *widestType;
// Go through each pair and find the widest bit to which we need
// to extend all of them.
for (Subscript *Pair : Pairs) {
const SCEV *Src = Pair->Src;
const SCEV *Dst = Pair->Dst;
IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
if (SrcTy == nullptr || DstTy == nullptr) {
assert(SrcTy == DstTy && "This function only unify integer types and "
"expect Src and Dst share the same type "
"otherwise.");
continue;
}
if (SrcTy->getBitWidth() > widestWidthSeen) {
widestWidthSeen = SrcTy->getBitWidth();
widestType = SrcTy;
}
if (DstTy->getBitWidth() > widestWidthSeen) {
widestWidthSeen = DstTy->getBitWidth();
widestType = DstTy;
}
}
assert(widestWidthSeen > 0);
// Now extend each pair to the widest seen.
for (Subscript *Pair : Pairs) {
const SCEV *Src = Pair->Src;
const SCEV *Dst = Pair->Dst;
IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
if (SrcTy == nullptr || DstTy == nullptr) {
assert(SrcTy == DstTy && "This function only unify integer types and "
"expect Src and Dst share the same type "
"otherwise.");
continue;
}
if (SrcTy->getBitWidth() < widestWidthSeen)
// Sign-extend Src to widestType
Pair->Src = SE->getSignExtendExpr(Src, widestType);
if (DstTy->getBitWidth() < widestWidthSeen) {
// Sign-extend Dst to widestType
Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
}
}
}
// removeMatchingExtensions - Examines a subscript pair.
// If the source and destination are identically sign (or zero)
// extended, it strips off the extension in an effect to simplify
// the actual analysis.
void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
const SCEV *Src = Pair->Src;
const SCEV *Dst = Pair->Dst;
if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
(isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src);
const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst);
const SCEV *SrcCastOp = SrcCast->getOperand();
const SCEV *DstCastOp = DstCast->getOperand();
if (SrcCastOp->getType() == DstCastOp->getType()) {
Pair->Src = SrcCastOp;
Pair->Dst = DstCastOp;
}
}
}
// Examine the scev and return true iff it's linear.
// Collect any loops mentioned in the set of "Loops".
bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
SmallBitVector &Loops, bool IsSrc) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
return isLoopInvariant(Expr, LoopNest);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*SE);
const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(UB)) {
if (SE->getTypeSizeInBits(Start->getType()) <
SE->getTypeSizeInBits(UB->getType())) {
if (!AddRec->getNoWrapFlags())
return false;
}
}
if (!isLoopInvariant(Step, LoopNest))
return false;
if (IsSrc)
Loops.set(mapSrcLoop(AddRec->getLoop()));
else
Loops.set(mapDstLoop(AddRec->getLoop()));
return checkSubscript(Start, LoopNest, Loops, IsSrc);
}
// Examine the scev and return true iff it's linear.
// Collect any loops mentioned in the set of "Loops".
bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
SmallBitVector &Loops) {
return checkSubscript(Src, LoopNest, Loops, true);
}
// Examine the scev and return true iff it's linear.
// Collect any loops mentioned in the set of "Loops".
bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
SmallBitVector &Loops) {
return checkSubscript(Dst, LoopNest, Loops, false);
}
// Examines the subscript pair (the Src and Dst SCEVs)
// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
// Collects the associated loops in a set.
DependenceInfo::Subscript::ClassificationKind
DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
const SCEV *Dst, const Loop *DstLoopNest,
SmallBitVector &Loops) {
SmallBitVector SrcLoops(MaxLevels + 1);
SmallBitVector DstLoops(MaxLevels + 1);
if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
return Subscript::NonLinear;
if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
return Subscript::NonLinear;
Loops = SrcLoops;
Loops |= DstLoops;
unsigned N = Loops.count();
if (N == 0)
return Subscript::ZIV;
if (N == 1)
return Subscript::SIV;
if (N == 2 && (SrcLoops.count() == 0 ||
DstLoops.count() == 0 ||
(SrcLoops.count() == 1 && DstLoops.count() == 1)))
return Subscript::RDIV;
return Subscript::MIV;
}
// A wrapper around SCEV::isKnownPredicate.
// Looks for cases where we're interested in comparing for equality.
// If both X and Y have been identically sign or zero extended,
// it strips off the (confusing) extensions before invoking
// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
// will be similarly updated.
//
// If SCEV::isKnownPredicate can't prove the predicate,
// we try simple subtraction, which seems to help in some cases
// involving symbolics.
bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
const SCEV *Y) const {
if (Pred == CmpInst::ICMP_EQ ||
Pred == CmpInst::ICMP_NE) {
if ((isa<SCEVSignExtendExpr>(X) &&
isa<SCEVSignExtendExpr>(Y)) ||
(isa<SCEVZeroExtendExpr>(X) &&
isa<SCEVZeroExtendExpr>(Y))) {
const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X);
const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y);
const SCEV *Xop = CX->getOperand();
const SCEV *Yop = CY->getOperand();
if (Xop->getType() == Yop->getType()) {
X = Xop;
Y = Yop;
}
}
}
if (SE->isKnownPredicate(Pred, X, Y))
return true;
// If SE->isKnownPredicate can't prove the condition,
// we try the brute-force approach of subtracting
// and testing the difference.
// By testing with SE->isKnownPredicate first, we avoid
// the possibility of overflow when the arguments are constants.
const SCEV *Delta = SE->getMinusSCEV(X, Y);
switch (Pred) {
case CmpInst::ICMP_EQ:
return Delta->isZero();
case CmpInst::ICMP_NE:
return SE->isKnownNonZero(Delta);
case CmpInst::ICMP_SGE:
return SE->isKnownNonNegative(Delta);
case CmpInst::ICMP_SLE:
return SE->isKnownNonPositive(Delta);
case CmpInst::ICMP_SGT:
return SE->isKnownPositive(Delta);
case CmpInst::ICMP_SLT:
return SE->isKnownNegative(Delta);
default:
llvm_unreachable("unexpected predicate in isKnownPredicate");
}
}
/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
/// with some extra checking if S is an AddRec and we can prove less-than using
/// the loop bounds.
bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
// First unify to the same type
auto *SType = dyn_cast<IntegerType>(S->getType());
auto *SizeType = dyn_cast<IntegerType>(Size->getType());
if (!SType || !SizeType)
return false;
Type *MaxType =
(SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
S = SE->getTruncateOrZeroExtend(S, MaxType);
Size = SE->getTruncateOrZeroExtend(Size, MaxType);
// Special check for addrecs using BE taken count
const SCEV *Bound = SE->getMinusSCEV(S, Size);
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
if (AddRec->isAffine()) {
const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(BECount)) {
const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
if (SE->isKnownNegative(Limit))
return true;
}
}
}
// Check using normal isKnownNegative
const SCEV *LimitedBound =
SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
return SE->isKnownNegative(LimitedBound);
}
bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
bool Inbounds = false;
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
Inbounds = SrcGEP->isInBounds();
if (Inbounds) {
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
if (AddRec->isAffine()) {
// We know S is for Ptr, the operand on a load/store, so doesn't wrap.
// If both parts are NonNegative, the end result will be NonNegative
if (SE->isKnownNonNegative(AddRec->getStart()) &&
SE->isKnownNonNegative(AddRec->getOperand(1)))
return true;
}
}
}
return SE->isKnownNonNegative(S);
}
// All subscripts are all the same type.
// Loop bound may be smaller (e.g., a char).
// Should zero extend loop bound, since it's always >= 0.
// This routine collects upper bound and extends or truncates if needed.
// Truncating is safe when subscripts are known not to wrap. Cases without
// nowrap flags should have been rejected earlier.
// Return null if no bound available.
const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
const SCEV *UB = SE->getBackedgeTakenCount(L);
return SE->getTruncateOrZeroExtend(UB, T);
}
return nullptr;
}
// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
// If the cast fails, returns NULL.
const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
Type *T) const {
if (const SCEV *UB = collectUpperBound(L, T))
return dyn_cast<SCEVConstant>(UB);
return nullptr;
}
// testZIV -
// When we have a pair of subscripts of the form [c1] and [c2],
// where c1 and c2 are both loop invariant, we attack it using
// the ZIV test. Basically, we test by comparing the two values,
// but there are actually three possible results:
// 1) the values are equal, so there's a dependence
// 2) the values are different, so there's no dependence
// 3) the values might be equal, so we have to assume a dependence.
//
// Return true if dependence disproved.
bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
FullDependence &Result) const {
LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
++ZIVapplications;
if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
LLVM_DEBUG(dbgs() << " provably dependent\n");
return false; // provably dependent
}
if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
LLVM_DEBUG(dbgs() << " provably independent\n");
++ZIVindependence;
return true; // provably independent
}
LLVM_DEBUG(dbgs() << " possibly dependent\n");
Result.Consistent = false;
return false; // possibly dependent
}
// strongSIVtest -
// From the paper, Practical Dependence Testing, Section 4.2.1
//
// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
// where i is an induction variable, c1 and c2 are loop invariant,
// and a is a constant, we can solve it exactly using the Strong SIV test.
//
// Can prove independence. Failing that, can compute distance (and direction).
// In the presence of symbolic terms, we can sometimes make progress.
//
// If there's a dependence,
//
// c1 + a*i = c2 + a*i'
//
// The dependence distance is
//
// d = i' - i = (c1 - c2)/a
//
// A dependence only exists if d is an integer and abs(d) <= U, where U is the
// loop's upper bound. If a dependence exists, the dependence direction is
// defined as
//
// { < if d > 0
// direction = { = if d = 0
// { > if d < 0
//
// Return true if dependence disproved.
bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
const SCEV *DstConst, const Loop *CurLoop,
unsigned Level, FullDependence &Result,
Constraint &NewConstraint) const {
LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
++StrongSIVapplications;
assert(0 < Level && Level <= CommonLevels && "level out of range");
Level--;
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
// check that |Delta| < iteration count
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
const SCEV *AbsDelta =
SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
const SCEV *AbsCoeff =
SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
// Distance greater than trip count - no dependence
++StrongSIVindependence;
++StrongSIVsuccesses;
return true;
}
}
// Can we compute distance?
if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
APInt Distance = ConstDelta; // these need to be initialized
APInt Remainder = ConstDelta;
APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
// Make sure Coeff divides Delta exactly
if (Remainder != 0) {
// Coeff doesn't divide Distance, no dependence
++StrongSIVindependence;
++StrongSIVsuccesses;
return true;
}
Result.DV[Level].Distance = SE->getConstant(Distance);
NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
if (Distance.sgt(0))
Result.DV[Level].Direction &= Dependence::DVEntry::LT;
else if (Distance.slt(0))
Result.DV[Level].Direction &= Dependence::DVEntry::GT;
else
Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
++StrongSIVsuccesses;
}
else if (Delta->isZero()) {
// since 0/X == 0
Result.DV[Level].Distance = Delta;
NewConstraint.setDistance(Delta, CurLoop);
Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
++StrongSIVsuccesses;
}
else {
if (Coeff->isOne()) {
LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
Result.DV[Level].Distance = Delta; // since X/1 == X
NewConstraint.setDistance(Delta, CurLoop);
}
else {
Result.Consistent = false;
NewConstraint.setLine(Coeff,
SE->getNegativeSCEV(Coeff),
SE->getNegativeSCEV(Delta), CurLoop);
}
// maybe we can get a useful direction
bool DeltaMaybeZero = !SE->isKnownNonZero(Delta);
bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
// The double negatives above are confusing.
// It helps to read !SE->isKnownNonZero(Delta)
// as "Delta might be Zero"
unsigned NewDirection = Dependence::DVEntry::NONE;
if ((DeltaMaybePositive && CoeffMaybePositive) ||
(DeltaMaybeNegative && CoeffMaybeNegative))
NewDirection = Dependence::DVEntry::LT;
if (DeltaMaybeZero)
NewDirection |= Dependence::DVEntry::EQ;
if ((DeltaMaybeNegative && CoeffMaybePositive) ||
(DeltaMaybePositive && CoeffMaybeNegative))
NewDirection |= Dependence::DVEntry::GT;
if (NewDirection < Result.DV[Level].Direction)
++StrongSIVsuccesses;
Result.DV[Level].Direction &= NewDirection;
}
return false;
}
// weakCrossingSIVtest -
// From the paper, Practical Dependence Testing, Section 4.2.2
//
// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
// where i is an induction variable, c1 and c2 are loop invariant,
// and a is a constant, we can solve it exactly using the
// Weak-Crossing SIV test.
//
// Given c1 + a*i = c2 - a*i', we can look for the intersection of
// the two lines, where i = i', yielding
//
// c1 + a*i = c2 - a*i
// 2a*i = c2 - c1
// i = (c2 - c1)/2a
//
// If i < 0, there is no dependence.
// If i > upperbound, there is no dependence.
// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
// If i = upperbound, there's a dependence with distance = 0.
// If i is integral, there's a dependence (all directions).
// If the non-integer part = 1/2, there's a dependence (<> directions).
// Otherwise, there's no dependence.
//
// Can prove independence. Failing that,
// can sometimes refine the directions.
// Can determine iteration for splitting.
//
// Return true if dependence disproved.
bool DependenceInfo::weakCrossingSIVtest(
const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
const Loop *CurLoop, unsigned Level, FullDependence &Result,
Constraint &NewConstraint, const SCEV *&SplitIter) const {
LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++WeakCrossingSIVapplications;
assert(0 < Level && Level <= CommonLevels && "Level out of range");
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
if (Delta->isZero()) {
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
++WeakCrossingSIVsuccesses;
if (!Result.DV[Level].Direction) {
++WeakCrossingSIVindependence;
return true;
}
Result.DV[Level].Distance = Delta; // = 0
return false;
}
const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
if (!ConstCoeff)
return false;
Result.DV[Level].Splitable = true;
if (SE->isKnownNegative(ConstCoeff)) {
ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
assert(ConstCoeff &&
"dynamic cast of negative of ConstCoeff should yield constant");
Delta = SE->getNegativeSCEV(Delta);
}
assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
// compute SplitIter for use by DependenceInfo::getSplitIteration()
SplitIter = SE->getUDivExpr(
SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
if (!ConstDelta)
return false;
// We're certain that ConstCoeff > 0; therefore,
// if Delta < 0, then no dependence.
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
if (SE->isKnownNegative(Delta)) {
// No dependence, Delta < 0
++WeakCrossingSIVindependence;
++WeakCrossingSIVsuccesses;
return true;
}
// We're certain that Delta > 0 and ConstCoeff > 0.
// Check Delta/(2*ConstCoeff) against upper loop bound
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
ConstantTwo);
LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
// Delta too big, no dependence
++WeakCrossingSIVindependence;
++WeakCrossingSIVsuccesses;
return true;
}
if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
// i = i' = UB
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
++WeakCrossingSIVsuccesses;
if (!Result.DV[Level].Direction) {
++WeakCrossingSIVindependence;
return true;
}
Result.DV[Level].Splitable = false;
Result.DV[Level].Distance = SE->getZero(Delta->getType());
return false;
}
}
// check that Coeff divides Delta
APInt APDelta = ConstDelta->getAPInt();
APInt APCoeff = ConstCoeff->getAPInt();
APInt Distance = APDelta; // these need to be initialzed
APInt Remainder = APDelta;
APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
if (Remainder != 0) {
// Coeff doesn't divide Delta, no dependence
++WeakCrossingSIVindependence;
++WeakCrossingSIVsuccesses;
return true;
}
LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
// if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
APInt Two = APInt(Distance.getBitWidth(), 2, true);
Remainder = Distance.srem(Two);
LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
if (Remainder != 0) {
// Equal direction isn't possible
Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
++WeakCrossingSIVsuccesses;
}
return false;
}
// Kirch's algorithm, from
//
// Optimizing Supercompilers for Supercomputers
// Michael Wolfe
// MIT Press, 1989
//
// Program 2.1, page 29.
// Computes the GCD of AM and BM.
// Also finds a solution to the equation ax - by = gcd(a, b).
// Returns true if dependence disproved; i.e., gcd does not divide Delta.
static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
APInt A0(Bits, 1, true), A1(Bits, 0, true);
APInt B0(Bits, 0, true), B1(Bits, 1, true);
APInt G0 = AM.abs();
APInt G1 = BM.abs();
APInt Q = G0; // these need to be initialized
APInt R = G0;
APInt::sdivrem(G0, G1, Q, R);
while (R != 0) {
APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
G0 = G1; G1 = R;
APInt::sdivrem(G0, G1, Q, R);
}
G = G1;
LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
X = AM.slt(0) ? -A1 : A1;
Y = BM.slt(0) ? B1 : -B1;
// make sure gcd divides Delta
R = Delta.srem(G);
if (R != 0)
return true; // gcd doesn't divide Delta, no dependence
Q = Delta.sdiv(G);
return false;
}
static APInt floorOfQuotient(const APInt &A, const APInt &B) {
APInt Q = A; // these need to be initialized
APInt R = A;
APInt::sdivrem(A, B, Q, R);
if (R == 0)
return Q;
if ((A.sgt(0) && B.sgt(0)) ||
(A.slt(0) && B.slt(0)))
return Q;
else
return Q - 1;
}
static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
APInt Q = A; // these need to be initialized
APInt R = A;
APInt::sdivrem(A, B, Q, R);
if (R == 0)
return Q;
if ((A.sgt(0) && B.sgt(0)) ||
(A.slt(0) && B.slt(0)))
return Q + 1;
else
return Q;
}
// exactSIVtest -
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
// where i is an induction variable, c1 and c2 are loop invariant, and a1
// and a2 are constant, we can solve it exactly using an algorithm developed
// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
//
// Dependence Analysis for Supercomputing
// Utpal Banerjee
// Kluwer Academic Publishers, 1988
//
// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
// so use them if possible. They're also a bit better with symbolics and,
// in the case of the strong SIV test, can compute Distances.
//
// Return true if dependence disproved.
//
// This is a modified version of the original Banerjee algorithm. The original
// only tested whether Dst depends on Src. This algorithm extends that and
// returns all the dependencies that exist between Dst and Src.
bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
const Loop *CurLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++ExactSIVapplications;
assert(0 < Level && Level <= CommonLevels && "Level out of range");
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
CurLoop);
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
return false;
// find gcd
APInt G, X, Y;
APInt AM = ConstSrcCoeff->getAPInt();
APInt BM = ConstDstCoeff->getAPInt();
APInt CM = ConstDelta->getAPInt();
unsigned Bits = AM.getBitWidth();
if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactSIVindependence;
++ExactSIVsuccesses;
return true;
}
LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
// since SCEV construction normalizes, LM = 0
APInt UM(Bits, 1, true);
bool UMValid = false;
// UM is perhaps unavailable, let's check
if (const SCEVConstant *CUB =
collectConstantUpperBound(CurLoop, Delta->getType())) {
UM = CUB->getAPInt();
LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n");
UMValid = true;
}
APInt TU(APInt::getSignedMaxValue(Bits));
APInt TL(APInt::getSignedMinValue(Bits));
APInt TC = CM.sdiv(G);
APInt TX = X * TC;
APInt TY = Y * TC;
LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
SmallVector<APInt, 2> TLVec, TUVec;
APInt TB = BM.sdiv(G);
if (TB.sgt(0)) {
TLVec.push_back(ceilingOfQuotient(-TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
// New bound check - modification to Banerjee's e3 check
if (UMValid) {
TUVec.push_back(floorOfQuotient(UM - TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
} else {
TUVec.push_back(floorOfQuotient(-TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
// New bound check - modification to Banerjee's e3 check
if (UMValid) {
TLVec.push_back(ceilingOfQuotient(UM - TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
}
}
APInt TA = AM.sdiv(G);
if (TA.sgt(0)) {
if (UMValid) {
TUVec.push_back(floorOfQuotient(UM - TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
// New bound check - modification to Banerjee's e3 check
TLVec.push_back(ceilingOfQuotient(-TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
} else {
if (UMValid) {
TLVec.push_back(ceilingOfQuotient(UM - TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
}
// New bound check - modification to Banerjee's e3 check
TUVec.push_back(floorOfQuotient(-TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
if (TLVec.empty() || TUVec.empty())
return false;
TL = APIntOps::smax(TLVec.front(), TLVec.back());
TU = APIntOps::smin(TUVec.front(), TUVec.back());
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (TL.sgt(TU)) {
++ExactSIVindependence;
++ExactSIVsuccesses;
return true;
}
// explore directions
unsigned NewDirection = Dependence::DVEntry::NONE;
APInt LowerDistance, UpperDistance;
if (TA.sgt(TB)) {
LowerDistance = (TY - TX) + (TA - TB) * TL;
UpperDistance = (TY - TX) + (TA - TB) * TU;
} else {
LowerDistance = (TY - TX) + (TA - TB) * TU;
UpperDistance = (TY - TX) + (TA - TB) * TL;
}
LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n");
LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n");
APInt Zero(Bits, 0, true);
if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) {
NewDirection |= Dependence::DVEntry::EQ;
++ExactSIVsuccesses;
}
if (LowerDistance.slt(0)) {
NewDirection |= Dependence::DVEntry::GT;
++ExactSIVsuccesses;
}
if (UpperDistance.sgt(0)) {
NewDirection |= Dependence::DVEntry::LT;
++ExactSIVsuccesses;
}
// finished
Result.DV[Level].Direction &= NewDirection;
if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
++ExactSIVindependence;
LLVM_DEBUG(dbgs() << "\t Result = ");
LLVM_DEBUG(Result.dump(dbgs()));
return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
}
// Return true if the divisor evenly divides the dividend.
static
bool isRemainderZero(const SCEVConstant *Dividend,
const SCEVConstant *Divisor) {
const APInt &ConstDividend = Dividend->getAPInt();
const APInt &ConstDivisor = Divisor->getAPInt();
return ConstDividend.srem(ConstDivisor) == 0;
}
// weakZeroSrcSIVtest -
// From the paper, Practical Dependence Testing, Section 4.2.2
//
// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
// where i is an induction variable, c1 and c2 are loop invariant,
// and a is a constant, we can solve it exactly using the
// Weak-Zero SIV test.
//
// Given
//
// c1 = c2 + a*i
//
// we get
//
// (c1 - c2)/a = i
//
// If i is not an integer, there's no dependence.
// If i < 0 or > UB, there's no dependence.
// If i = 0, the direction is >= and peeling the
// 1st iteration will break the dependence.
// If i = UB, the direction is <= and peeling the
// last iteration will break the dependence.
// Otherwise, the direction is *.
//
// Can prove independence. Failing that, we can sometimes refine
// the directions. Can sometimes show that first or last
// iteration carries all the dependences (so worth peeling).
//
// (see also weakZeroDstSIVtest)
//
// Return true if dependence disproved.
bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
const SCEV *SrcConst,
const SCEV *DstConst,
const Loop *CurLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
// For the WeakSIV test, it's possible the loop isn't common to
// the Src and Dst loops. If it isn't, then there's no need to
// record a direction.
LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++WeakZeroSIVapplications;
assert(0 < Level && Level <= MaxLevels && "Level out of range");
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
CurLoop);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
if (Level < CommonLevels) {
Result.DV[Level].Direction &= Dependence::DVEntry::GE;
Result.DV[Level].PeelFirst = true;
++WeakZeroSIVsuccesses;
}
return false; // dependences caused by first iteration
}
const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
if (!ConstCoeff)
return false;
const SCEV *AbsCoeff =
SE->isKnownNegative(ConstCoeff) ?
SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
const SCEV *NewDelta =
SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
// check that Delta/SrcCoeff < iteration count
// really check NewDelta < count*AbsCoeff
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
++WeakZeroSIVindependence;
++WeakZeroSIVsuccesses;
return true;
}
if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
// dependences caused by last iteration
if (Level < CommonLevels) {
Result.DV[Level].Direction &= Dependence::DVEntry::LE;
Result.DV[Level].PeelLast = true;
++WeakZeroSIVsuccesses;
}
return false;
}
}
// check that Delta/SrcCoeff >= 0
// really check that NewDelta >= 0
if (SE->isKnownNegative(NewDelta)) {
// No dependence, newDelta < 0
++WeakZeroSIVindependence;
++WeakZeroSIVsuccesses;
return true;
}
// if SrcCoeff doesn't divide Delta, then no dependence
if (isa<SCEVConstant>(Delta) &&
!isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
++WeakZeroSIVindependence;
++WeakZeroSIVsuccesses;
return true;
}
return false;
}
// weakZeroDstSIVtest -
// From the paper, Practical Dependence Testing, Section 4.2.2
//
// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
// where i is an induction variable, c1 and c2 are loop invariant,
// and a is a constant, we can solve it exactly using the
// Weak-Zero SIV test.
//
// Given
//
// c1 + a*i = c2
//
// we get
//
// i = (c2 - c1)/a
//
// If i is not an integer, there's no dependence.
// If i < 0 or > UB, there's no dependence.
// If i = 0, the direction is <= and peeling the
// 1st iteration will break the dependence.
// If i = UB, the direction is >= and peeling the
// last iteration will break the dependence.
// Otherwise, the direction is *.
//
// Can prove independence. Failing that, we can sometimes refine
// the directions. Can sometimes show that first or last
// iteration carries all the dependences (so worth peeling).
//
// (see also weakZeroSrcSIVtest)
//
// Return true if dependence disproved.
bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
const SCEV *SrcConst,
const SCEV *DstConst,
const Loop *CurLoop, unsigned Level,
FullDependence &Result,
Constraint &NewConstraint) const {
// For the WeakSIV test, it's possible the loop isn't common to the
// Src and Dst loops. If it isn't, then there's no need to record a direction.
LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++WeakZeroSIVapplications;
assert(0 < Level && Level <= SrcLevels && "Level out of range");
Level--;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
CurLoop);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
if (Level < CommonLevels) {
Result.DV[Level].Direction &= Dependence::DVEntry::LE;
Result.DV[Level].PeelFirst = true;
++WeakZeroSIVsuccesses;
}
return false; // dependences caused by first iteration
}
const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
if (!ConstCoeff)
return false;
const SCEV *AbsCoeff =
SE->isKnownNegative(ConstCoeff) ?
SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
const SCEV *NewDelta =
SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
// check that Delta/SrcCoeff < iteration count
// really check NewDelta < count*AbsCoeff
if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
++WeakZeroSIVindependence;
++WeakZeroSIVsuccesses;
return true;
}
if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
// dependences caused by last iteration
if (Level < CommonLevels) {
Result.DV[Level].Direction &= Dependence::DVEntry::GE;
Result.DV[Level].PeelLast = true;
++WeakZeroSIVsuccesses;
}
return false;
}
}
// check that Delta/SrcCoeff >= 0
// really check that NewDelta >= 0
if (SE->isKnownNegative(NewDelta)) {
// No dependence, newDelta < 0
++WeakZeroSIVindependence;
++WeakZeroSIVsuccesses;
return true;
}
// if SrcCoeff doesn't divide Delta, then no dependence
if (isa<SCEVConstant>(Delta) &&
!isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
++WeakZeroSIVindependence;
++WeakZeroSIVsuccesses;
return true;
}
return false;
}
// exactRDIVtest - Tests the RDIV subscript pair for dependence.
// Things of the form [c1 + a*i] and [c2 + b*j],
// where i and j are induction variable, c1 and c2 are loop invariant,
// and a and b are constants.
// Returns true if any possible dependence is disproved.
// Marks the result as inconsistent.
// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
const SCEV *SrcConst, const SCEV *DstConst,
const Loop *SrcLoop, const Loop *DstLoop,
FullDependence &Result) const {
LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
++ExactRDIVapplications;
Result.Consistent = false;
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
return false;
// find gcd
APInt G, X, Y;
APInt AM = ConstSrcCoeff->getAPInt();
APInt BM = ConstDstCoeff->getAPInt();
APInt CM = ConstDelta->getAPInt();
unsigned Bits = AM.getBitWidth();
if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
// gcd doesn't divide Delta, no dependence
++ExactRDIVindependence;
return true;
}
LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
// since SCEV construction seems to normalize, LM = 0
APInt SrcUM(Bits, 1, true);
bool SrcUMvalid = false;
// SrcUM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(SrcLoop, Delta->getType())) {
SrcUM = UpperBound->getAPInt();
LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
SrcUMvalid = true;
}
APInt DstUM(Bits, 1, true);
bool DstUMvalid = false;
// UM is perhaps unavailable, let's check
if (const SCEVConstant *UpperBound =
collectConstantUpperBound(DstLoop, Delta->getType())) {
DstUM = UpperBound->getAPInt();
LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
DstUMvalid = true;
}
APInt TU(APInt::getSignedMaxValue(Bits));
APInt TL(APInt::getSignedMinValue(Bits));
APInt TC = CM.sdiv(G);
APInt TX = X * TC;
APInt TY = Y * TC;
LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
SmallVector<APInt, 2> TLVec, TUVec;
APInt TB = BM.sdiv(G);
if (TB.sgt(0)) {
TLVec.push_back(ceilingOfQuotient(-TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
if (SrcUMvalid) {
TUVec.push_back(floorOfQuotient(SrcUM - TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
} else {
TUVec.push_back(floorOfQuotient(-TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
if (SrcUMvalid) {
TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
}
}
APInt TA = AM.sdiv(G);
if (TA.sgt(0)) {
TLVec.push_back(ceilingOfQuotient(-TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
if (DstUMvalid) {
TUVec.push_back(floorOfQuotient(DstUM - TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
}
} else {
TUVec.push_back(floorOfQuotient(-TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
if (DstUMvalid) {
TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA));
LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
}
}
if (TLVec.empty() || TUVec.empty())
return false;
LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
TL = APIntOps::smax(TLVec.front(), TLVec.back());
TU = APIntOps::smin(TUVec.front(), TUVec.back());
LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
if (TL.sgt(TU))
++ExactRDIVindependence;
return TL.sgt(TU);
}
// symbolicRDIVtest -
// In Section 4.5 of the Practical Dependence Testing paper,the authors
// introduce a special case of Banerjee's Inequalities (also called the
// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
// particularly cases with symbolics. Since it's only able to disprove
// dependence (not compute distances or directions), we'll use it as a
// fall back for the other tests.
//
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
// where i and j are induction variables and c1 and c2 are loop invariants,
// we can use the symbolic tests to disprove some dependences, serving as a
// backup for the RDIV test. Note that i and j can be the same variable,
// letting this test serve as a backup for the various SIV tests.
//
// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
// loop bounds for the i and j loops, respectively. So, ...
//
// c1 + a1*i = c2 + a2*j
// a1*i - a2*j = c2 - c1
//
// To test for a dependence, we compute c2 - c1 and make sure it's in the
// range of the maximum and minimum possible values of a1*i - a2*j.
// Considering the signs of a1 and a2, we have 4 possible cases:
//
// 1) If a1 >= 0 and a2 >= 0, then
// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
// -a2*N2 <= c2 - c1 <= a1*N1
//
// 2) If a1 >= 0 and a2 <= 0, then
// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
// 0 <= c2 - c1 <= a1*N1 - a2*N2
//
// 3) If a1 <= 0 and a2 >= 0, then
// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
// a1*N1 - a2*N2 <= c2 - c1 <= 0
//
// 4) If a1 <= 0 and a2 <= 0, then
// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
// a1*N1 <= c2 - c1 <= -a2*N2
//
// return true if dependence disproved
bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
const SCEV *C1, const SCEV *C2,
const Loop *Loop1,
const Loop *Loop2) const {
++SymbolicRDIVapplications;
LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
if (SE->isKnownNonNegative(A1)) {
if (SE->isKnownNonNegative(A2)) {
// A1 >= 0 && A2 >= 0
if (N1) {
// make sure that c2 - c1 <= a1*N1
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
++SymbolicRDIVindependence;
return true;
}
}
if (N2) {
// make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
++SymbolicRDIVindependence;
return true;
}
}
}
else if (SE->isKnownNonPositive(A2)) {
// a1 >= 0 && a2 <= 0
if (N1 && N2) {
// make sure that c2 - c1 <= a1*N1 - a2*N2
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
++SymbolicRDIVindependence;
return true;
}
}
// make sure that 0 <= c2 - c1
if (SE->isKnownNegative(C2_C1)) {
++SymbolicRDIVindependence;
return true;
}
}
}
else if (SE->isKnownNonPositive(A1)) {
if (SE->isKnownNonNegative(A2)) {
// a1 <= 0 && a2 >= 0
if (N1 && N2) {
// make sure that a1*N1 - a2*N2 <= c2 - c1
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
++SymbolicRDIVindependence;
return true;
}
}
// make sure that c2 - c1 <= 0
if (SE->isKnownPositive(C2_C1)) {
++SymbolicRDIVindependence;
return true;
}
}
else if (SE->isKnownNonPositive(A2)) {
// a1 <= 0 && a2 <= 0
if (N1) {
// make sure that a1*N1 <= c2 - c1
const SCEV *A1N1 = SE->getMulExpr(A1, N1);
LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
++SymbolicRDIVindependence;
return true;
}
}
if (N2) {
// make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
const SCEV *A2N2 = SE->getMulExpr(A2, N2);
LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
++SymbolicRDIVindependence;
return true;
}
}
}
}
return false;
}
// testSIV -
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
// a2 are constant, we attack it with an SIV test. While they can all be
// solved with the Exact SIV test, it's worthwhile to use simpler tests when
// they apply; they're cheaper and sometimes more precise.
//
// Return true if dependence disproved.
bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
FullDependence &Result, Constraint &NewConstraint,
const SCEV *&SplitIter) const {
LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
if (SrcAddRec && DstAddRec) {
const SCEV *SrcConst = SrcAddRec->getStart();
const SCEV *DstConst = DstAddRec->getStart();
const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
const Loop *CurLoop = SrcAddRec->getLoop();
assert(CurLoop == DstAddRec->getLoop() &&
"both loops in SIV should be same");
Level = mapSrcLoop(CurLoop);
bool disproven;
if (SrcCoeff == DstCoeff)
disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
Level, Result, NewConstraint);
else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
Level, Result, NewConstraint, SplitIter);
else
disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
Level, Result, NewConstraint);
return disproven ||
gcdMIVtest(Src, Dst, Result) ||
symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
}
if (SrcAddRec) {
const SCEV *SrcConst = SrcAddRec->getStart();
const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
const SCEV *DstConst = Dst;
const Loop *CurLoop = SrcAddRec->getLoop();
Level = mapSrcLoop(CurLoop);
return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
Level, Result, NewConstraint) ||
gcdMIVtest(Src, Dst, Result);
}
if (DstAddRec) {
const SCEV *DstConst = DstAddRec->getStart();
const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
const SCEV *SrcConst = Src;
const Loop *CurLoop = DstAddRec->getLoop();
Level = mapDstLoop(CurLoop);
return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
CurLoop, Level, Result, NewConstraint) ||
gcdMIVtest(Src, Dst, Result);
}
llvm_unreachable("SIV test expected at least one AddRec");
return false;
}
// testRDIV -
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
// where i and j are induction variables, c1 and c2 are loop invariant,
// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
// It doesn't make sense to talk about distance or direction in this case,
// so there's no point in making special versions of the Strong SIV test or
// the Weak-crossing SIV test.
//
// With minor algebra, this test can also be used for things like
// [c1 + a1*i + a2*j][c2].
//
// Return true if dependence disproved.
bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
FullDependence &Result) const {
// we have 3 possible situations here:
// 1) [a*i + b] and [c*j + d]
// 2) [a*i + c*j + b] and [d]
// 3) [b] and [a*i + c*j + d]
// We need to find what we've got and get organized
const SCEV *SrcConst, *DstConst;
const SCEV *SrcCoeff, *DstCoeff;
const Loop *SrcLoop, *DstLoop;
LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
if (SrcAddRec && DstAddRec) {
SrcConst = SrcAddRec->getStart();
SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
SrcLoop = SrcAddRec->getLoop();
DstConst = DstAddRec->getStart();
DstCoeff = DstAddRec->getStepRecurrence(*SE);
DstLoop = DstAddRec->getLoop();
}
else if (SrcAddRec) {
if (const SCEVAddRecExpr *tmpAddRec =
dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
SrcConst = tmpAddRec->getStart();
SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
SrcLoop = tmpAddRec->getLoop();
DstConst = Dst;
DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
DstLoop = SrcAddRec->getLoop();
}
else
llvm_unreachable("RDIV reached by surprising SCEVs");
}
else if (DstAddRec) {
if (const SCEVAddRecExpr *tmpAddRec =
dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
DstConst = tmpAddRec->getStart();
DstCoeff = tmpAddRec->getStepRecurrence(*SE);
DstLoop = tmpAddRec->getLoop();
SrcConst = Src;
SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
SrcLoop = DstAddRec->getLoop();
}
else
llvm_unreachable("RDIV reached by surprising SCEVs");
}
else
llvm_unreachable("RDIV expected at least one AddRec");
return exactRDIVtest(SrcCoeff, DstCoeff,
SrcConst, DstConst,
SrcLoop, DstLoop,
Result) ||
gcdMIVtest(Src, Dst, Result) ||
symbolicRDIVtest(SrcCoeff, DstCoeff,
SrcConst, DstConst,
SrcLoop, DstLoop);
}
// Tests the single-subscript MIV pair (Src and Dst) for dependence.
// Return true if dependence disproved.
// Can sometimes refine direction vectors.
bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
const SmallBitVector &Loops,
FullDependence &Result) const {
LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
Result.Consistent = false;
return gcdMIVtest(Src, Dst, Result) ||
banerjeeMIVtest(Src, Dst, Loops, Result);
}
// Given a product, e.g., 10*X*Y, returns the first constant operand,
// in this case 10. If there is no constant part, returns NULL.
static
const SCEVConstant *getConstantPart(const SCEV *Expr) {
if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
return Constant;
else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
return Constant;
return nullptr;
}
//===----------------------------------------------------------------------===//
// gcdMIVtest -
// Tests an MIV subscript pair for dependence.
// Returns true if any possible dependence is disproved.
// Marks the result as inconsistent.
// Can sometimes disprove the equal direction for 1 or more loops,
// as discussed in Michael Wolfe's book,
// High Performance Compilers for Parallel Computing, page 235.
//
// We spend some effort (code!) to handle cases like
// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
// but M and N are just loop-invariant variables.
// This should help us handle linearized subscripts;
// also makes this test a useful backup to the various SIV tests.
//
// It occurs to me that the presence of loop-invariant variables
// changes the nature of the test from "greatest common divisor"
// to "a common divisor".
bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
FullDependence &Result) const {
LLVM_DEBUG(dbgs() << "starting gcd\n");
++GCDapplications;
unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
APInt RunningGCD = APInt::getZero(BitWidth);
// Examine Src coefficients.
// Compute running GCD and record source constant.
// Because we're looking for the constant at the end of the chain,
// we can't quit the loop just because the GCD == 1.
const SCEV *Coefficients = Src;
while (const SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(Coefficients)) {
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
const auto *Constant = getConstantPart(Coeff);
if (!Constant)
return false;
APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
Coefficients = AddRec->getStart();
}
const SCEV *SrcConst = Coefficients;
// Examine Dst coefficients.
// Compute running GCD and record destination constant.
// Because we're looking for the constant at the end of the chain,
// we can't quit the loop just because the GCD == 1.
Coefficients = Dst;
while (const SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(Coefficients)) {
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
const auto *Constant = getConstantPart(Coeff);
if (!Constant)
return false;
APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
Coefficients = AddRec->getStart();
}
const SCEV *DstConst = Coefficients;
APInt ExtraGCD = APInt::getZero(BitWidth);
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
// If Delta is a sum of products, we may be able to make further progress.
for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
const SCEV *Operand = Sum->getOperand(Op);
if (isa<SCEVConstant>(Operand)) {
assert(!Constant && "Surprised to find multiple constants");
Constant = cast<SCEVConstant>(Operand);
}
else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
// Search for constant operand to participate in GCD;
// If none found; return false.
const SCEVConstant *ConstOp = getConstantPart(Product);
if (!ConstOp)
return false;
APInt ConstOpValue = ConstOp->getAPInt();
ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
ConstOpValue.abs());
}
else
return false;
}
}
if (!Constant)
return false;
APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
if (ConstDelta == 0)
return false;
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
APInt Remainder = ConstDelta.srem(RunningGCD);
if (Remainder != 0) {
++GCDindependence;
return true;
}
// Try to disprove equal directions.
// For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
// the code above can't disprove the dependence because the GCD = 1.
// So we consider what happen if i = i' and what happens if j = j'.
// If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
// which is infeasible, so we can disallow the = direction for the i level.
// Setting j = j' doesn't help matters, so we end up with a direction vector
// of [<>, *]
//
// Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
// we need to remember that the constant part is 5 and the RunningGCD should
// be initialized to ExtraGCD = 30.
LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
bool Improved = false;
Coefficients = Src;
while (const SCEVAddRecExpr *AddRec =
dyn_cast<SCEVAddRecExpr>(Coefficients)) {
Coefficients = AddRec->getStart();
const Loop *CurLoop = AddRec->getLoop();
RunningGCD = ExtraGCD;
const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
const SCEV *Inner = Src;
while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
AddRec = cast<SCEVAddRecExpr>(Inner);
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
if (CurLoop == AddRec->getLoop())
; // SrcCoeff == Coeff
else {
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
Constant = getConstantPart(Coeff);
if (!Constant)
return false;
APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
}
Inner = AddRec->getStart();
}
Inner = Dst;
while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
AddRec = cast<SCEVAddRecExpr>(Inner);
const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
if (CurLoop == AddRec->getLoop())
DstCoeff = Coeff;
else {
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
Constant = getConstantPart(Coeff);
if (!Constant)
return false;
APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
}
Inner = AddRec->getStart();
}
Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
// If the coefficient is the product of a constant and other stuff,
// we can use the constant in the GCD computation.
Constant = getConstantPart(Delta);
if (!Constant)
// The difference of the two coefficients might not be a product
// or constant, in which case we give up on this direction.
continue;
APInt ConstCoeff = Constant->getAPInt();
RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
if (RunningGCD != 0) {
Remainder = ConstDelta.srem(RunningGCD);
LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
if (Remainder != 0) {
unsigned Level = mapSrcLoop(CurLoop);
Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
Improved = true;
}
}
}
if (Improved)
++GCDsuccesses;
LLVM_DEBUG(dbgs() << "all done\n");
return false;
}
//===----------------------------------------------------------------------===//
// banerjeeMIVtest -
// Use Banerjee's Inequalities to test an MIV subscript pair.
// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
// Generally follows the discussion in Section 2.5.2 of
//
// Optimizing Supercompilers for Supercomputers
// Michael Wolfe
//
// The inequalities given on page 25 are simplified in that loops are
// normalized so that the lower bound is always 0 and the stride is always 1.
// For example, Wolfe gives
//
// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
//
// where A_k is the coefficient of the kth index in the source subscript,
// B_k is the coefficient of the kth index in the destination subscript,
// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
// index, and N_k is the stride of the kth index. Since all loops are normalized
// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
// equation to
//
// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
// = (A^-_k - B_k)^- (U_k - 1) - B_k
//
// Similar simplifications are possible for the other equations.
//
// When we can't determine the number of iterations for a loop,
// we use NULL as an indicator for the worst case, infinity.
// When computing the upper bound, NULL denotes +inf;
// for the lower bound, NULL denotes -inf.
//
// Return true if dependence disproved.
bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
const SmallBitVector &Loops,
FullDependence &Result) const {
LLVM_DEBUG(dbgs() << "starting Banerjee\n");
++BanerjeeApplications;
LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
const SCEV *A0;
CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
const SCEV *B0;
CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
const SCEV *Delta = SE->getMinusSCEV(B0, A0);
LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
// Compute bounds for all the * directions.
LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
for (unsigned K = 1; K <= MaxLevels; ++K) {
Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
Bound[K].Direction = Dependence::DVEntry::ALL;
Bound[K].DirSet = Dependence::DVEntry::NONE;
findBoundsALL(A, B, Bound, K);
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "\t " << K << '\t');
if (Bound[K].Lower[Dependence::DVEntry::ALL])
LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
else
LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[K].Upper[Dependence::DVEntry::ALL])
LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
else
LLVM_DEBUG(dbgs() << "+inf\n");
#endif
}
// Test the *, *, *, ... case.
bool Disproved = false;
if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
// Explore the direction vector hierarchy.
unsigned DepthExpanded = 0;
unsigned NewDeps = exploreDirections(1, A, B, Bound,
Loops, DepthExpanded, Delta);
if (NewDeps > 0) {
bool Improved = false;
for (unsigned K = 1; K <= CommonLevels; ++K) {
if (Loops[K]) {
unsigned Old = Result.DV[K - 1].Direction;
Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
Improved |= Old != Result.DV[K - 1].Direction;
if (!Result.DV[K - 1].Direction) {
Improved = false;
Disproved = true;
break;
}
}
}
if (Improved)
++BanerjeeSuccesses;
}
else {
++BanerjeeIndependence;
Disproved = true;
}
}
else {
++BanerjeeIndependence;
Disproved = true;
}
delete [] Bound;
delete [] A;
delete [] B;
return Disproved;
}
// Hierarchically expands the direction vector
// search space, combining the directions of discovered dependences
// in the DirSet field of Bound. Returns the number of distinct
// dependences discovered. If the dependence is disproved,
// it will return 0.
unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
CoefficientInfo *B, BoundInfo *Bound,
const SmallBitVector &Loops,
unsigned &DepthExpanded,
const SCEV *Delta) const {
// This algorithm has worst case complexity of O(3^n), where 'n' is the number
// of common loop levels. To avoid excessive compile-time, pessimize all the
// results and immediately return when the number of common levels is beyond
// the given threshold.
if (CommonLevels > MIVMaxLevelThreshold) {
LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
"direction exploration is terminated.\n");
for (unsigned K = 1; K <= CommonLevels; ++K)
if (Loops[K])
Bound[K].DirSet = Dependence::DVEntry::ALL;
return 1;
}
if (Level > CommonLevels) {
// record result
LLVM_DEBUG(dbgs() << "\t[");
for (unsigned K = 1; K <= CommonLevels; ++K) {
if (Loops[K]) {
Bound[K].DirSet |= Bound[K].Direction;
#ifndef NDEBUG
switch (Bound[K].Direction) {
case Dependence::DVEntry::LT:
LLVM_DEBUG(dbgs() << " <");
break;
case Dependence::DVEntry::EQ:
LLVM_DEBUG(dbgs() << " =");
break;
case Dependence::DVEntry::GT:
LLVM_DEBUG(dbgs() << " >");
break;
case Dependence::DVEntry::ALL:
LLVM_DEBUG(dbgs() << " *");
break;
default:
llvm_unreachable("unexpected Bound[K].Direction");
}
#endif
}
}
LLVM_DEBUG(dbgs() << " ]\n");
return 1;
}
if (Loops[Level]) {
if (Level > DepthExpanded) {
DepthExpanded = Level;
// compute bounds for <, =, > at current level
findBoundsLT(A, B, Bound, Level);
findBoundsGT(A, B, Bound, Level);
findBoundsEQ(A, B, Bound, Level);
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
LLVM_DEBUG(dbgs() << "\t <\t");
if (Bound[Level].Lower[Dependence::DVEntry::LT])
LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
<< '\t');
else
LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[Level].Upper[Dependence::DVEntry::LT])
LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
<< '\n');
else
LLVM_DEBUG(dbgs() << "+inf\n");
LLVM_DEBUG(dbgs() << "\t =\t");
if (Bound[Level].Lower[Dependence::DVEntry::EQ])
LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
<< '\t');
else
LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[Level].Upper[Dependence::DVEntry::EQ])
LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
<< '\n');
else
LLVM_DEBUG(dbgs() << "+inf\n");
LLVM_DEBUG(dbgs() << "\t >\t");
if (Bound[Level].Lower[Dependence::DVEntry::GT])
LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
<< '\t');
else
LLVM_DEBUG(dbgs() << "-inf\t");
if (Bound[Level].Upper[Dependence::DVEntry::GT])
LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
<< '\n');
else
LLVM_DEBUG(dbgs() << "+inf\n");
#endif
}
unsigned NewDeps = 0;
// test bounds for <, *, *, ...
if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
NewDeps += exploreDirections(Level + 1, A, B, Bound,
Loops, DepthExpanded, Delta);
// Test bounds for =, *, *, ...
if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
NewDeps += exploreDirections(Level + 1, A, B, Bound,
Loops, DepthExpanded, Delta);
// test bounds for >, *, *, ...
if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
NewDeps += exploreDirections(Level + 1, A, B, Bound,
Loops, DepthExpanded, Delta);
Bound[Level].Direction = Dependence::DVEntry::ALL;
return NewDeps;
}
else
return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
}
// Returns true iff the current bounds are plausible.
bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
BoundInfo *Bound, const SCEV *Delta) const {
Bound[Level].Direction = DirKind;
if (const SCEV *LowerBound = getLowerBound(Bound))
if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
return false;
if (const SCEV *UpperBound = getUpperBound(Bound))
if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
return false;
return true;
}
// Computes the upper and lower bounds for level K
// using the * direction. Records them in Bound.
// Wolfe gives the equations
//
// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
//
// Since we normalize loops, we can simplify these equations to
//
// LB^*_k = (A^-_k - B^+_k)U_k
// UB^*_k = (A^+_k - B^-_k)U_k
//
// We must be careful to handle the case where the upper bound is unknown.
// Note that the lower bound is always <= 0
// and the upper bound is always >= 0.
void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
BoundInfo *Bound, unsigned K) const {
Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
Bound[K].Lower[Dependence::DVEntry::ALL] =
SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
Bound[K].Iterations);
Bound[K].Upper[Dependence::DVEntry::ALL] =
SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
Bound[K].Iterations);
}
else {
// If the difference is 0, we won't need to know the number of iterations.
if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
Bound[K].Lower[Dependence::DVEntry::ALL] =
SE->getZero(A[K].Coeff->getType());
if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
Bound[K].Upper[Dependence::DVEntry::ALL] =
SE->getZero(A[K].Coeff->getType());
}
}
// Computes the upper and lower bounds for level K
// using the = direction. Records them in Bound.
// Wolfe gives the equations
//
// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
//
// Since we normalize loops, we can simplify these equations to
//
// LB^=_k = (A_k - B_k)^- U_k
// UB^=_k = (A_k - B_k)^+ U_k
//
// We must be careful to handle the case where the upper bound is unknown.
// Note that the lower bound is always <= 0
// and the upper bound is always >= 0.
void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
BoundInfo *Bound, unsigned K) const {
Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
const SCEV *NegativePart = getNegativePart(Delta);
Bound[K].Lower[Dependence::DVEntry::EQ] =
SE->getMulExpr(NegativePart, Bound[K].Iterations);
const SCEV *PositivePart = getPositivePart(Delta);
Bound[K].Upper[Dependence::DVEntry::EQ] =
SE->getMulExpr(PositivePart, Bound[K].Iterations);
}
else {
// If the positive/negative part of the difference is 0,
// we won't need to know the number of iterations.
const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
const SCEV *NegativePart = getNegativePart(Delta);
if (NegativePart->isZero())
Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
const SCEV *PositivePart = getPositivePart(Delta);
if (PositivePart->isZero())
Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
}
}
// Computes the upper and lower bounds for level K
// using the < direction. Records them in Bound.
// Wolfe gives the equations
//
// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
//
// Since we normalize loops, we can simplify these equations to
//
// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
//
// We must be careful to handle the case where the upper bound is unknown.
void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
BoundInfo *Bound, unsigned K) const {
Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
const SCEV *Iter_1 = SE->getMinusSCEV(
Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
Bound[K].Lower[Dependence::DVEntry::LT] =
SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
const SCEV *PosPart =
getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
Bound[K].Upper[Dependence::DVEntry::LT] =
SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
}
else {
// If the positive/negative part of the difference is 0,
// we won't need to know the number of iterations.
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
if (NegPart->isZero())
Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
const SCEV *PosPart =
getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
if (PosPart->isZero())
Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
}
}
// Computes the upper and lower bounds for level K
// using the > direction. Records them in Bound.
// Wolfe gives the equations
//
// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
//
// Since we normalize loops, we can simplify these equations to
//
// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
//
// We must be careful to handle the case where the upper bound is unknown.
void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
BoundInfo *Bound, unsigned K) const {
Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
const SCEV *Iter_1 = SE->getMinusSCEV(
Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
const SCEV *NegPart =
getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
Bound[K].Lower[Dependence::DVEntry::GT] =
SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
const SCEV *PosPart =
getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
Bound[K].Upper[Dependence::DVEntry::GT] =
SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
}
else {
// If the positive/negative part of the difference is 0,
// we won't need to know the number of iterations.
const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
if (NegPart->isZero())
Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
if (PosPart->isZero())
Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
}
}
// X^+ = max(X, 0)
const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
return SE->getSMaxExpr(X, SE->getZero(X->getType()));
}
// X^- = min(X, 0)
const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
return SE->getSMinExpr(X, SE->getZero(X->getType()));
}
// Walks through the subscript,
// collecting each coefficient, the associated loop bounds,
// and recording its positive and negative parts for later use.
DependenceInfo::CoefficientInfo *
DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
const SCEV *&Constant) const {
const SCEV *Zero = SE->getZero(Subscript->getType());
CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
for (unsigned K = 1; K <= MaxLevels; ++K) {
CI[K].Coeff = Zero;
CI[K].PosPart = Zero;
CI[K].NegPart = Zero;
CI[K].Iterations = nullptr;
}
while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
const Loop *L = AddRec->getLoop();
unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
CI[K].Coeff = AddRec->getStepRecurrence(*SE);
CI[K].PosPart = getPositivePart(CI[K].Coeff);
CI[K].NegPart = getNegativePart(CI[K].Coeff);
CI[K].Iterations = collectUpperBound(L, Subscript->getType());
Subscript = AddRec->getStart();
}
Constant = Subscript;
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
for (unsigned K = 1; K <= MaxLevels; ++K) {
LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
LLVM_DEBUG(dbgs() << "\tPos Part = ");
LLVM_DEBUG(dbgs() << *CI[K].PosPart);
LLVM_DEBUG(dbgs() << "\tNeg Part = ");
LLVM_DEBUG(dbgs() << *CI[K].NegPart);
LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
if (CI[K].Iterations)
LLVM_DEBUG(dbgs() << *CI[K].Iterations);
else
LLVM_DEBUG(dbgs() << "+inf");
LLVM_DEBUG(dbgs() << '\n');
}
LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
#endif
return CI;
}
// Looks through all the bounds info and
// computes the lower bound given the current direction settings
// at each level. If the lower bound for any level is -inf,
// the result is -inf.
const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
if (Bound[K].Lower[Bound[K].Direction])
Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
else
Sum = nullptr;
}
return Sum;
}
// Looks through all the bounds info and
// computes the upper bound given the current direction settings
// at each level. If the upper bound at any level is +inf,
// the result is +inf.
const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
if (Bound[K].Upper[Bound[K].Direction])
Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
else
Sum = nullptr;
}
return Sum;
}
//===----------------------------------------------------------------------===//
// Constraint manipulation for Delta test.
// Given a linear SCEV,
// return the coefficient (the step)
// corresponding to the specified loop.
// If there isn't one, return 0.
// For example, given a*i + b*j + c*k, finding the coefficient
// corresponding to the j loop would yield b.
const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
const Loop *TargetLoop) const {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
return SE->getZero(Expr->getType());
if (AddRec->getLoop() == TargetLoop)
return AddRec->getStepRecurrence(*SE);
return findCoefficient(AddRec->getStart(), TargetLoop);
}
// Given a linear SCEV,
// return the SCEV given by zeroing out the coefficient
// corresponding to the specified loop.
// For example, given a*i + b*j + c*k, zeroing the coefficient
// corresponding to the j loop would yield a*i + c*k.
const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
const Loop *TargetLoop) const {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
return Expr; // ignore
if (AddRec->getLoop() == TargetLoop)
return AddRec->getStart();
return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
AddRec->getStepRecurrence(*SE),
AddRec->getLoop(),
AddRec->getNoWrapFlags());
}
// Given a linear SCEV Expr,
// return the SCEV given by adding some Value to the
// coefficient corresponding to the specified TargetLoop.
// For example, given a*i + b*j + c*k, adding 1 to the coefficient
// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
const Loop *TargetLoop,
const SCEV *Value) const {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec) // create a new addRec
return SE->getAddRecExpr(Expr,
Value,
TargetLoop,
SCEV::FlagAnyWrap); // Worst case, with no info.
if (AddRec->getLoop() == TargetLoop) {
const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
if (Sum->isZero())
return AddRec->getStart();
return SE->getAddRecExpr(AddRec->getStart(),
Sum,
AddRec->getLoop(),
AddRec->getNoWrapFlags());
}
2013-06-29 02:44:48 +08:00
if (SE->isLoopInvariant(AddRec, TargetLoop))
return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
return SE->getAddRecExpr(
addToCoefficient(AddRec->getStart(), TargetLoop, Value),
AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
AddRec->getNoWrapFlags());
}
// Review the constraints, looking for opportunities
// to simplify a subscript pair (Src and Dst).
// Return true if some simplification occurs.
// If the simplification isn't exact (that is, if it is conservative
// in terms of dependence), set consistent to false.
// Corresponds to Figure 5 from the paper
//
// Practical Dependence Testing
// Goff, Kennedy, Tseng
// PLDI 1991
bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
SmallBitVector &Loops,
SmallVectorImpl<Constraint> &Constraints,
bool &Consistent) {
bool Result = false;
for (unsigned LI : Loops.set_bits()) {
LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
LLVM_DEBUG(Constraints[LI].dump(dbgs()));
if (Constraints[LI].isDistance())
Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
else if (Constraints[LI].isLine())
Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
else if (Constraints[LI].isPoint())
Result |= propagatePoint(Src, Dst, Constraints[LI]);
}
return Result;
}
// Attempt to propagate a distance
// constraint into a subscript pair (Src and Dst).
// Return true if some simplification occurs.
// If the simplification isn't exact (that is, if it is conservative
// in terms of dependence), set consistent to false.
bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint,
bool &Consistent) {
const Loop *CurLoop = CurConstraint.getAssociatedLoop();
LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
const SCEV *A_K = findCoefficient(Src, CurLoop);
if (A_K->isZero())
return false;
const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
Src = SE->getMinusSCEV(Src, DA_K);
Src = zeroCoefficient(Src, CurLoop);
LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
if (!findCoefficient(Dst, CurLoop)->isZero())
Consistent = false;
return true;
}
// Attempt to propagate a line
// constraint into a subscript pair (Src and Dst).
// Return true if some simplification occurs.
// If the simplification isn't exact (that is, if it is conservative
// in terms of dependence), set consistent to false.
bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint,
bool &Consistent) {
const Loop *CurLoop = CurConstraint.getAssociatedLoop();
const SCEV *A = CurConstraint.getA();
const SCEV *B = CurConstraint.getB();
const SCEV *C = CurConstraint.getC();
LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
<< "\n");
LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
if (A->isZero()) {
const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Bconst || !Cconst) return false;
APInt Beta = Bconst->getAPInt();
APInt Charlie = Cconst->getAPInt();
APInt CdivB = Charlie.sdiv(Beta);
assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
const SCEV *AP_K = findCoefficient(Dst, CurLoop);
// Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
Dst = zeroCoefficient(Dst, CurLoop);
if (!findCoefficient(Src, CurLoop)->isZero())
Consistent = false;
}
else if (B->isZero()) {
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Aconst || !Cconst) return false;
APInt Alpha = Aconst->getAPInt();
APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
const SCEV *A_K = findCoefficient(Src, CurLoop);
Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
Src = zeroCoefficient(Src, CurLoop);
if (!findCoefficient(Dst, CurLoop)->isZero())
Consistent = false;
}
else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
if (!Aconst || !Cconst) return false;
APInt Alpha = Aconst->getAPInt();
APInt Charlie = Cconst->getAPInt();
APInt CdivA = Charlie.sdiv(Alpha);
assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
const SCEV *A_K = findCoefficient(Src, CurLoop);
Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
Src = zeroCoefficient(Src, CurLoop);
Dst = addToCoefficient(Dst, CurLoop, A_K);
if (!findCoefficient(Dst, CurLoop)->isZero())
Consistent = false;
}
else {
// paper is incorrect here, or perhaps just misleading
const SCEV *A_K = findCoefficient(Src, CurLoop);
Src = SE->getMulExpr(Src, A);
Dst = SE->getMulExpr(Dst, A);
Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
Src = zeroCoefficient(Src, CurLoop);
Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
if (!findCoefficient(Dst, CurLoop)->isZero())
Consistent = false;
}
LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
return true;
}
// Attempt to propagate a point
// constraint into a subscript pair (Src and Dst).
// Return true if some simplification occurs.
bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
Constraint &CurConstraint) {
const Loop *CurLoop = CurConstraint.getAssociatedLoop();
const SCEV *A_K = findCoefficient(Src, CurLoop);
const SCEV *AP_K = findCoefficient(Dst, CurLoop);
const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
Src = zeroCoefficient(Src, CurLoop);
LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
Dst = zeroCoefficient(Dst, CurLoop);
LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
return true;
}
// Update direction vector entry based on the current constraint.
void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
const Constraint &CurConstraint) const {
LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
LLVM_DEBUG(CurConstraint.dump(dbgs()));
if (CurConstraint.isAny())
; // use defaults
else if (CurConstraint.isDistance()) {
// this one is consistent, the others aren't
Level.Scalar = false;
Level.Distance = CurConstraint.getD();
unsigned NewDirection = Dependence::DVEntry::NONE;
if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
NewDirection = Dependence::DVEntry::EQ;
if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
NewDirection |= Dependence::DVEntry::LT;
if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
NewDirection |= Dependence::DVEntry::GT;
Level.Direction &= NewDirection;
}
else if (CurConstraint.isLine()) {
Level.Scalar = false;
Level.Distance = nullptr;
// direction should be accurate
}
else if (CurConstraint.isPoint()) {
Level.Scalar = false;
Level.Distance = nullptr;
unsigned NewDirection = Dependence::DVEntry::NONE;
if (!isKnownPredicate(CmpInst::ICMP_NE,
CurConstraint.getY(),
CurConstraint.getX()))
// if X may be = Y
NewDirection |= Dependence::DVEntry::EQ;
if (!isKnownPredicate(CmpInst::ICMP_SLE,
CurConstraint.getY(),
CurConstraint.getX()))
// if Y may be > X
NewDirection |= Dependence::DVEntry::LT;
if (!isKnownPredicate(CmpInst::ICMP_SGE,
CurConstraint.getY(),
CurConstraint.getX()))
// if Y may be < X
NewDirection |= Dependence::DVEntry::GT;
Level.Direction &= NewDirection;
}
else
llvm_unreachable("constraint has unexpected kind");
}
/// Check if we can delinearize the subscripts. If the SCEVs representing the
/// source and destination array references are recurrences on a nested loop,
/// this function flattens the nested recurrences into separate recurrences
/// for each loop level.
bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVectorImpl<Subscript> &Pair) {
assert(isLoadOrStore(Src) && "instruction is not load or store");
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
Loop *SrcLoop = LI->getLoopFor(Src->getParent());
Loop *DstLoop = LI->getLoopFor(Dst->getParent());
const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
if (!SrcBase || !DstBase || SrcBase != DstBase)
return false;
SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
SrcSubscripts, DstSubscripts) &&
!tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
SrcSubscripts, DstSubscripts))
return false;
int Size = SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
for (int I = 0; I < Size; I++)
dbgs() << *SrcSubscripts[I];
dbgs() << "\nDstSubscripts: ";
for (int I = 0; I < Size; I++)
dbgs() << *DstSubscripts[I];
});
// The delinearization transforms a single-subscript MIV dependence test into
// a multi-subscript SIV dependence test that is easier to compute. So we
// resize Pair to contain as many pairs of subscripts as the delinearization
// has found, and then initialize the pairs following the delinearization.
Pair.resize(Size);
for (int I = 0; I < Size; ++I) {
Pair[I].Src = SrcSubscripts[I];
Pair[I].Dst = DstSubscripts[I];
unifySubscriptType(&Pair[I]);
}
return true;
}
bool DependenceInfo::tryDelinearizeFixedSize(
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts) {
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
assert(SrcBase && DstBase && SrcBase == DstBase &&
"expected src and dst scev unknowns to be equal");
// Check the simple case where the array dimensions are fixed size.
auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
if (!SrcGEP || !DstGEP)
return false;
SmallVector<int, 4> SrcSizes, DstSizes;
getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes);
getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes);
// Check that the two size arrays are non-empty and equal in length and
// value.
if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
SrcSizes.size() != DstSizes.size() ||
!std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
SrcSubscripts.clear();
DstSubscripts.clear();
return false;
}
Value *SrcBasePtr = SrcGEP->getOperand(0);
Value *DstBasePtr = DstGEP->getOperand(0);
while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
SrcBasePtr = PCast->getOperand(0);
while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
DstBasePtr = PCast->getOperand(0);
// Check that for identical base pointers we do not miss index offsets
// that have been added before this GEP is applied.
if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) {
SrcSubscripts.clear();
DstSubscripts.clear();
return false;
}
assert(SrcSubscripts.size() == DstSubscripts.size() &&
SrcSubscripts.size() == SrcSizes.size() + 1 &&
"Expected equal number of entries in the list of sizes and "
"subscripts.");
// In general we cannot safely assume that the subscripts recovered from GEPs
// are in the range of values defined for their corresponding array
// dimensions. For example some C language usage/interpretation make it
// impossible to verify this at compile-time. As such we can only delinearize
// iff the subscripts are positive and are less than the range of the
// dimension.
if (!DisableDelinearizationChecks) {
auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes,
SmallVectorImpl<const SCEV *> &Subscripts,
Value *Ptr) {
size_t SSize = Subscripts.size();
for (size_t I = 1; I < SSize; ++I) {
const SCEV *S = Subscripts[I];
if (!isKnownNonNegative(S, Ptr))
return false;
if (auto *SType = dyn_cast<IntegerType>(S->getType())) {
const SCEV *Range = SE->getConstant(
ConstantInt::get(SType, DimensionSizes[I - 1], false));
if (!isKnownLessThan(S, Range))
return false;
}
}
return true;
};
if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||
!AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) {
SrcSubscripts.clear();
DstSubscripts.clear();
return false;
}
}
LLVM_DEBUG({
dbgs() << "Delinearized subscripts of fixed-size array\n"
<< "SrcGEP:" << *SrcGEP << "\n"
<< "DstGEP:" << *DstGEP << "\n";
});
return true;
}
bool DependenceInfo::tryDelinearizeParametricSize(
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts) {
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
assert(SrcBase && DstBase && SrcBase == DstBase &&
"expected src and dst scev unknowns to be equal");
const SCEV *ElementSize = SE->getElementSize(Src);
if (ElementSize != SE->getElementSize(Dst))
return false;
const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
return false;
split delinearization pass in 3 steps To compute the dimensions of the array in a unique way, we split the delinearization analysis in three steps: - find parametric terms in all memory access functions - compute the array dimensions from the set of terms - compute the delinearized access functions for each dimension The first step is executed on all the memory access functions such that we gather all the patterns in which an array is accessed. The second step reduces all this information in a unique description of the sizes of the array. The third step is delinearizing each memory access function following the common description of the shape of the array computed in step 2. This rewrite of the delinearization pass also solves a problem we had with the previous implementation: because the previous algorithm was by induction on the structure of the SCEV, it would not correctly recognize the shape of the array when the memory access was not following the nesting of the loops: for example, see polly/test/ScopInfo/multidim_only_ivs_3d_reverse.ll ; void foo(long n, long m, long o, double A[n][m][o]) { ; ; for (long i = 0; i < n; i++) ; for (long j = 0; j < m; j++) ; for (long k = 0; k < o; k++) ; A[i][k][j] = 1.0; Starting with this patch we no longer delinearize access functions that do not contain parameters, for example in test/Analysis/DependenceAnalysis/GCD.ll ;; for (long int i = 0; i < 100; i++) ;; for (long int j = 0; j < 100; j++) { ;; A[2*i - 4*j] = i; ;; *B++ = A[6*i + 8*j]; these accesses will not be delinearized as the upper bound of the loops are constants, and their access functions do not contain SCEVUnknown parameters. llvm-svn: 208232
2014-05-08 02:01:20 +08:00
// First step: collect parametric terms in both array references.
SmallVector<const SCEV *, 4> Terms;
collectParametricTerms(*SE, SrcAR, Terms);
collectParametricTerms(*SE, DstAR, Terms);
split delinearization pass in 3 steps To compute the dimensions of the array in a unique way, we split the delinearization analysis in three steps: - find parametric terms in all memory access functions - compute the array dimensions from the set of terms - compute the delinearized access functions for each dimension The first step is executed on all the memory access functions such that we gather all the patterns in which an array is accessed. The second step reduces all this information in a unique description of the sizes of the array. The third step is delinearizing each memory access function following the common description of the shape of the array computed in step 2. This rewrite of the delinearization pass also solves a problem we had with the previous implementation: because the previous algorithm was by induction on the structure of the SCEV, it would not correctly recognize the shape of the array when the memory access was not following the nesting of the loops: for example, see polly/test/ScopInfo/multidim_only_ivs_3d_reverse.ll ; void foo(long n, long m, long o, double A[n][m][o]) { ; ; for (long i = 0; i < n; i++) ; for (long j = 0; j < m; j++) ; for (long k = 0; k < o; k++) ; A[i][k][j] = 1.0; Starting with this patch we no longer delinearize access functions that do not contain parameters, for example in test/Analysis/DependenceAnalysis/GCD.ll ;; for (long int i = 0; i < 100; i++) ;; for (long int j = 0; j < 100; j++) { ;; A[2*i - 4*j] = i; ;; *B++ = A[6*i + 8*j]; these accesses will not be delinearized as the upper bound of the loops are constants, and their access functions do not contain SCEVUnknown parameters. llvm-svn: 208232
2014-05-08 02:01:20 +08:00
// Second step: find subscript sizes.
SmallVector<const SCEV *, 4> Sizes;
findArrayDimensions(*SE, Terms, Sizes, ElementSize);
split delinearization pass in 3 steps To compute the dimensions of the array in a unique way, we split the delinearization analysis in three steps: - find parametric terms in all memory access functions - compute the array dimensions from the set of terms - compute the delinearized access functions for each dimension The first step is executed on all the memory access functions such that we gather all the patterns in which an array is accessed. The second step reduces all this information in a unique description of the sizes of the array. The third step is delinearizing each memory access function following the common description of the shape of the array computed in step 2. This rewrite of the delinearization pass also solves a problem we had with the previous implementation: because the previous algorithm was by induction on the structure of the SCEV, it would not correctly recognize the shape of the array when the memory access was not following the nesting of the loops: for example, see polly/test/ScopInfo/multidim_only_ivs_3d_reverse.ll ; void foo(long n, long m, long o, double A[n][m][o]) { ; ; for (long i = 0; i < n; i++) ; for (long j = 0; j < m; j++) ; for (long k = 0; k < o; k++) ; A[i][k][j] = 1.0; Starting with this patch we no longer delinearize access functions that do not contain parameters, for example in test/Analysis/DependenceAnalysis/GCD.ll ;; for (long int i = 0; i < 100; i++) ;; for (long int j = 0; j < 100; j++) { ;; A[2*i - 4*j] = i; ;; *B++ = A[6*i + 8*j]; these accesses will not be delinearized as the upper bound of the loops are constants, and their access functions do not contain SCEVUnknown parameters. llvm-svn: 208232
2014-05-08 02:01:20 +08:00
// Third step: compute the access functions for each subscript.
computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
split delinearization pass in 3 steps To compute the dimensions of the array in a unique way, we split the delinearization analysis in three steps: - find parametric terms in all memory access functions - compute the array dimensions from the set of terms - compute the delinearized access functions for each dimension The first step is executed on all the memory access functions such that we gather all the patterns in which an array is accessed. The second step reduces all this information in a unique description of the sizes of the array. The third step is delinearizing each memory access function following the common description of the shape of the array computed in step 2. This rewrite of the delinearization pass also solves a problem we had with the previous implementation: because the previous algorithm was by induction on the structure of the SCEV, it would not correctly recognize the shape of the array when the memory access was not following the nesting of the loops: for example, see polly/test/ScopInfo/multidim_only_ivs_3d_reverse.ll ; void foo(long n, long m, long o, double A[n][m][o]) { ; ; for (long i = 0; i < n; i++) ; for (long j = 0; j < m; j++) ; for (long k = 0; k < o; k++) ; A[i][k][j] = 1.0; Starting with this patch we no longer delinearize access functions that do not contain parameters, for example in test/Analysis/DependenceAnalysis/GCD.ll ;; for (long int i = 0; i < 100; i++) ;; for (long int j = 0; j < 100; j++) { ;; A[2*i - 4*j] = i; ;; *B++ = A[6*i + 8*j]; these accesses will not be delinearized as the upper bound of the loops are constants, and their access functions do not contain SCEVUnknown parameters. llvm-svn: 208232
2014-05-08 02:01:20 +08:00
// Fail when there is only a subscript: that's a linearized access function.
if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
SrcSubscripts.size() != DstSubscripts.size())
return false;
size_t Size = SrcSubscripts.size();
// Statically check that the array bounds are in-range. The first subscript we
// don't have a size for and it cannot overflow into another subscript, so is
// always safe. The others need to be 0 <= subscript[i] < bound, for both src
// and dst.
// FIXME: It may be better to record these sizes and add them as constraints
// to the dependency checks.
[DA] Add an option to control delinearization validity checks Summary: Dependence Analysis performs static checks to confirm validity of delinearization. These checks often fail for 64-bit targets due to type conversions and integer wrapping that prevent simplification of the SCEV expressions. These checks would also fail at compile-time if the lower bound of the loops are compile-time unknown. For example: void foo(int n, int m, int a[][m]) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { a[i][j] = a[i+1][j-2]; } } opt -mem2reg -instcombine -indvars -loop-simplify -loop-rotate -inline -pass-remarks=.* -debug-pass=Arguments -da-permissive-validity-checks=false k3.ll -analyze -da will produce the following by default: da analyze - anti [* *|<]! but will produce the following expected dependence vector if the validity checks are disabled: da analyze - consistent anti [1 -2]! This revision will introduce a debug option that will leave the validity checks in place by default, but allow them to be turned off. New tests are added for cases where it cannot be proven at compile-time that the individual subscripts stay in-bound with respect to a particular dimension of an array. These tests enable the option to provide user guarantee that the subscripts do not over/under-flow into other dimensions, thereby producing more accurate dependence vectors. For prior discussion on this topic, leading to this change, please see the following thread: http://lists.llvm.org/pipermail/llvm-dev/2019-May/132372.html Reviewers: Meinersbur, jdoerfert, kbarton, dmgreen, fhahn Reviewed By: Meinersbur, jdoerfert, dmgreen Subscribers: fhahn, hiraditya, javed.absar, llvm-commits, Whitney, etiotto Tag: LLVM Differential Revision: https://reviews.llvm.org/D62610 llvm-svn: 362711
2019-06-06 23:12:49 +08:00
if (!DisableDelinearizationChecks)
for (size_t I = 1; I < Size; ++I) {
if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
[DA] Add an option to control delinearization validity checks Summary: Dependence Analysis performs static checks to confirm validity of delinearization. These checks often fail for 64-bit targets due to type conversions and integer wrapping that prevent simplification of the SCEV expressions. These checks would also fail at compile-time if the lower bound of the loops are compile-time unknown. For example: void foo(int n, int m, int a[][m]) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { a[i][j] = a[i+1][j-2]; } } opt -mem2reg -instcombine -indvars -loop-simplify -loop-rotate -inline -pass-remarks=.* -debug-pass=Arguments -da-permissive-validity-checks=false k3.ll -analyze -da will produce the following by default: da analyze - anti [* *|<]! but will produce the following expected dependence vector if the validity checks are disabled: da analyze - consistent anti [1 -2]! This revision will introduce a debug option that will leave the validity checks in place by default, but allow them to be turned off. New tests are added for cases where it cannot be proven at compile-time that the individual subscripts stay in-bound with respect to a particular dimension of an array. These tests enable the option to provide user guarantee that the subscripts do not over/under-flow into other dimensions, thereby producing more accurate dependence vectors. For prior discussion on this topic, leading to this change, please see the following thread: http://lists.llvm.org/pipermail/llvm-dev/2019-May/132372.html Reviewers: Meinersbur, jdoerfert, kbarton, dmgreen, fhahn Reviewed By: Meinersbur, jdoerfert, dmgreen Subscribers: fhahn, hiraditya, javed.absar, llvm-commits, Whitney, etiotto Tag: LLVM Differential Revision: https://reviews.llvm.org/D62610 llvm-svn: 362711
2019-06-06 23:12:49 +08:00
return false;
if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
[DA] Add an option to control delinearization validity checks Summary: Dependence Analysis performs static checks to confirm validity of delinearization. These checks often fail for 64-bit targets due to type conversions and integer wrapping that prevent simplification of the SCEV expressions. These checks would also fail at compile-time if the lower bound of the loops are compile-time unknown. For example: void foo(int n, int m, int a[][m]) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { a[i][j] = a[i+1][j-2]; } } opt -mem2reg -instcombine -indvars -loop-simplify -loop-rotate -inline -pass-remarks=.* -debug-pass=Arguments -da-permissive-validity-checks=false k3.ll -analyze -da will produce the following by default: da analyze - anti [* *|<]! but will produce the following expected dependence vector if the validity checks are disabled: da analyze - consistent anti [1 -2]! This revision will introduce a debug option that will leave the validity checks in place by default, but allow them to be turned off. New tests are added for cases where it cannot be proven at compile-time that the individual subscripts stay in-bound with respect to a particular dimension of an array. These tests enable the option to provide user guarantee that the subscripts do not over/under-flow into other dimensions, thereby producing more accurate dependence vectors. For prior discussion on this topic, leading to this change, please see the following thread: http://lists.llvm.org/pipermail/llvm-dev/2019-May/132372.html Reviewers: Meinersbur, jdoerfert, kbarton, dmgreen, fhahn Reviewed By: Meinersbur, jdoerfert, dmgreen Subscribers: fhahn, hiraditya, javed.absar, llvm-commits, Whitney, etiotto Tag: LLVM Differential Revision: https://reviews.llvm.org/D62610 llvm-svn: 362711
2019-06-06 23:12:49 +08:00
return false;
if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
[DA] Add an option to control delinearization validity checks Summary: Dependence Analysis performs static checks to confirm validity of delinearization. These checks often fail for 64-bit targets due to type conversions and integer wrapping that prevent simplification of the SCEV expressions. These checks would also fail at compile-time if the lower bound of the loops are compile-time unknown. For example: void foo(int n, int m, int a[][m]) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { a[i][j] = a[i+1][j-2]; } } opt -mem2reg -instcombine -indvars -loop-simplify -loop-rotate -inline -pass-remarks=.* -debug-pass=Arguments -da-permissive-validity-checks=false k3.ll -analyze -da will produce the following by default: da analyze - anti [* *|<]! but will produce the following expected dependence vector if the validity checks are disabled: da analyze - consistent anti [1 -2]! This revision will introduce a debug option that will leave the validity checks in place by default, but allow them to be turned off. New tests are added for cases where it cannot be proven at compile-time that the individual subscripts stay in-bound with respect to a particular dimension of an array. These tests enable the option to provide user guarantee that the subscripts do not over/under-flow into other dimensions, thereby producing more accurate dependence vectors. For prior discussion on this topic, leading to this change, please see the following thread: http://lists.llvm.org/pipermail/llvm-dev/2019-May/132372.html Reviewers: Meinersbur, jdoerfert, kbarton, dmgreen, fhahn Reviewed By: Meinersbur, jdoerfert, dmgreen Subscribers: fhahn, hiraditya, javed.absar, llvm-commits, Whitney, etiotto Tag: LLVM Differential Revision: https://reviews.llvm.org/D62610 llvm-svn: 362711
2019-06-06 23:12:49 +08:00
return false;
if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
[DA] Add an option to control delinearization validity checks Summary: Dependence Analysis performs static checks to confirm validity of delinearization. These checks often fail for 64-bit targets due to type conversions and integer wrapping that prevent simplification of the SCEV expressions. These checks would also fail at compile-time if the lower bound of the loops are compile-time unknown. For example: void foo(int n, int m, int a[][m]) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { a[i][j] = a[i+1][j-2]; } } opt -mem2reg -instcombine -indvars -loop-simplify -loop-rotate -inline -pass-remarks=.* -debug-pass=Arguments -da-permissive-validity-checks=false k3.ll -analyze -da will produce the following by default: da analyze - anti [* *|<]! but will produce the following expected dependence vector if the validity checks are disabled: da analyze - consistent anti [1 -2]! This revision will introduce a debug option that will leave the validity checks in place by default, but allow them to be turned off. New tests are added for cases where it cannot be proven at compile-time that the individual subscripts stay in-bound with respect to a particular dimension of an array. These tests enable the option to provide user guarantee that the subscripts do not over/under-flow into other dimensions, thereby producing more accurate dependence vectors. For prior discussion on this topic, leading to this change, please see the following thread: http://lists.llvm.org/pipermail/llvm-dev/2019-May/132372.html Reviewers: Meinersbur, jdoerfert, kbarton, dmgreen, fhahn Reviewed By: Meinersbur, jdoerfert, dmgreen Subscribers: fhahn, hiraditya, javed.absar, llvm-commits, Whitney, etiotto Tag: LLVM Differential Revision: https://reviews.llvm.org/D62610 llvm-svn: 362711
2019-06-06 23:12:49 +08:00
return false;
}
return true;
}
//===----------------------------------------------------------------------===//
#ifndef NDEBUG
// For debugging purposes, dump a small bit vector to dbgs().
static void dumpSmallBitVector(SmallBitVector &BV) {
dbgs() << "{";
for (unsigned VI : BV.set_bits()) {
dbgs() << VI;
if (BV.find_next(VI) >= 0)
dbgs() << ' ';
}
dbgs() << "}\n";
}
#endif
bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
// Check if the analysis itself has been invalidated.
auto PAC = PA.getChecker<DependenceAnalysis>();
if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
return true;
// Check transitive dependencies.
return Inv.invalidate<AAManager>(F, PA) ||
Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
Inv.invalidate<LoopAnalysis>(F, PA);
}
// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
// Corresponds to Section 3.1 in the paper
//
// Practical Dependence Testing
// Goff, Kennedy, Tseng
// PLDI 1991
//
// Care is required to keep the routine below, getSplitIteration(),
// up to date with respect to this routine.
std::unique_ptr<Dependence>
DependenceInfo::depends(Instruction *Src, Instruction *Dst,
bool PossiblyLoopIndependent) {
if (Src == Dst)
PossiblyLoopIndependent = false;
if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
// if both instructions don't reference memory, there's no dependence
return nullptr;
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
return std::make_unique<Dependence>(Src, Dst);
}
assert(isLoadOrStore(Src) && "instruction is not load or store");
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(),
MemoryLocation::get(Dst),
MemoryLocation::get(Src))) {
case AliasResult::MayAlias:
case AliasResult::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
return std::make_unique<Dependence>(Src, Dst);
case AliasResult::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
LLVM_DEBUG(dbgs() << "no alias\n");
return nullptr;
case AliasResult::MustAlias:
break; // The underlying objects alias; test accesses for dependence.
}
// establish loop nesting levels
establishNestingLevels(Src, Dst);
LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3*i - 6] = i; ;; *B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837
2018-03-07 05:55:59 +08:00
unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
// If two pointers have different bases, trying to analyze indexes won't
// work; we can't compare them to each other. This can happen, for example,
// if one is produced by an LCSSA PHI node.
//
// We check this upfront so we don't crash in cases where getMinusSCEV()
// returns a SCEVCouldNotCompute.
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
return std::make_unique<Dependence>(Src, Dst);
}
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3*i - 6] = i; ;; *B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837
2018-03-07 05:55:59 +08:00
Pair[0].Src = SrcSCEV;
Pair[0].Dst = DstSCEV;
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3*i - 6] = i; ;; *B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837
2018-03-07 05:55:59 +08:00
if (Delinearize) {
if (tryDelinearize(Src, Dst, Pair)) {
LLVM_DEBUG(dbgs() << " delinearized\n");
Pairs = Pair.size();
}
}
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
Pair[P].Group.resize(Pairs);
removeMatchingExtensions(&Pair[P]);
Pair[P].Classification =
classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
Pair[P].Loops);
Pair[P].GroupLoops = Pair[P].Loops;
Pair[P].Group.set(P);
LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
LLVM_DEBUG(dbgs() << "\tloops = ");
LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
}
SmallBitVector Separable(Pairs);
SmallBitVector Coupled(Pairs);
// Partition subscripts into separable and minimally-coupled groups
// Algorithm in paper is algorithmically better;
// this may be faster in practice. Check someday.
//
// Here's an example of how it works. Consider this code:
//
// for (i = ...) {
// for (j = ...) {
// for (k = ...) {
// for (l = ...) {
// for (m = ...) {
// A[i][j][k][m] = ...;
// ... = A[0][j][l][i + j];
// }
// }
// }
// }
// }
//
// There are 4 subscripts here:
// 0 [i] and [0]
// 1 [j] and [j]
// 2 [k] and [l]
// 3 [m] and [i + j]
//
// We've already classified each subscript pair as ZIV, SIV, etc.,
// and collected all the loops mentioned by pair P in Pair[P].Loops.
// In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
// and set Pair[P].Group = {P}.
//
// Src Dst Classification Loops GroupLoops Group
// 0 [i] [0] SIV {1} {1} {0}
// 1 [j] [j] SIV {2} {2} {1}
// 2 [k] [l] RDIV {3,4} {3,4} {2}
// 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
//
// For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
// So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
//
// We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
// Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
// Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
// so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
// to either Separable or Coupled).
//
// Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
// Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
// so Pair[3].Group = {0, 1, 3} and Done = false.
//
// Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
// Since Done remains true, we add 2 to the set of Separable pairs.
//
// Finally, we consider 3. There's nothing to compare it with,
// so Done remains true and we add it to the Coupled set.
// Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
//
// In the end, we've got 1 separable subscript and 1 coupled group.
for (unsigned SI = 0; SI < Pairs; ++SI) {
if (Pair[SI].Classification == Subscript::NonLinear) {
// ignore these, but collect loops for later
++NonlinearSubscriptPairs;
collectCommonLoops(Pair[SI].Src,
LI->getLoopFor(Src->getParent()),
Pair[SI].Loops);
collectCommonLoops(Pair[SI].Dst,
LI->getLoopFor(Dst->getParent()),
Pair[SI].Loops);
Result.Consistent = false;
2015-03-05 09:25:19 +08:00
} else if (Pair[SI].Classification == Subscript::ZIV) {
// always separable
Separable.set(SI);
}
else {
// SIV, RDIV, or MIV, so check for coupled group
bool Done = true;
for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
SmallBitVector Intersection = Pair[SI].GroupLoops;
Intersection &= Pair[SJ].GroupLoops;
if (Intersection.any()) {
// accumulate set of all the loops in group
Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
// accumulate set of all subscripts in group
Pair[SJ].Group |= Pair[SI].Group;
Done = false;
}
}
if (Done) {
if (Pair[SI].Group.count() == 1) {
Separable.set(SI);
++SeparableSubscriptPairs;
}
else {
Coupled.set(SI);
++CoupledSubscriptPairs;
}
}
}
}
LLVM_DEBUG(dbgs() << " Separable = ");
LLVM_DEBUG(dumpSmallBitVector(Separable));
LLVM_DEBUG(dbgs() << " Coupled = ");
LLVM_DEBUG(dumpSmallBitVector(Coupled));
Constraint NewConstraint;
NewConstraint.setAny(SE);
// test separable subscripts
for (unsigned SI : Separable.set_bits()) {
LLVM_DEBUG(dbgs() << "testing subscript " << SI);
switch (Pair[SI].Classification) {
case Subscript::ZIV:
LLVM_DEBUG(dbgs() << ", ZIV\n");
if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
return nullptr;
break;
case Subscript::SIV: {
LLVM_DEBUG(dbgs() << ", SIV\n");
unsigned Level;
const SCEV *SplitIter = nullptr;
2015-03-05 09:25:19 +08:00
if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
SplitIter))
return nullptr;
break;
}
case Subscript::RDIV:
LLVM_DEBUG(dbgs() << ", RDIV\n");
if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
return nullptr;
break;
case Subscript::MIV:
LLVM_DEBUG(dbgs() << ", MIV\n");
if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
return nullptr;
break;
default:
llvm_unreachable("subscript has unexpected classification");
}
}
if (Coupled.count()) {
// test coupled subscript groups
LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
for (unsigned II = 0; II <= MaxLevels; ++II)
Constraints[II].setAny(SE);
for (unsigned SI : Coupled.set_bits()) {
LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
SmallBitVector Group(Pair[SI].Group);
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
SmallVector<Subscript *, 4> PairsInGroup;
for (unsigned SJ : Group.set_bits()) {
LLVM_DEBUG(dbgs() << SJ << " ");
if (Pair[SJ].Classification == Subscript::SIV)
Sivs.set(SJ);
else
Mivs.set(SJ);
PairsInGroup.push_back(&Pair[SJ]);
}
unifySubscriptType(PairsInGroup);
LLVM_DEBUG(dbgs() << "}\n");
while (Sivs.any()) {
bool Changed = false;
for (unsigned SJ : Sivs.set_bits()) {
LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
const SCEV *SplitIter = nullptr;
LLVM_DEBUG(dbgs() << "SIV\n");
2015-03-05 09:25:19 +08:00
if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
SplitIter))
return nullptr;
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
if (Constraints[Level].isEmpty()) {
++DeltaIndependence;
return nullptr;
}
Changed = true;
}
Sivs.reset(SJ);
}
if (Changed) {
// propagate, possibly creating new SIVs and ZIVs
LLVM_DEBUG(dbgs() << " propagating\n");
LLVM_DEBUG(dbgs() << "\tMivs = ");
LLVM_DEBUG(dumpSmallBitVector(Mivs));
for (unsigned SJ : Mivs.set_bits()) {
// SJ is an MIV subscript that's part of the current coupled group
LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
Constraints, Result.Consistent)) {
LLVM_DEBUG(dbgs() << "\t Changed\n");
++DeltaPropagations;
Pair[SJ].Classification =
classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
Pair[SJ].Loops);
switch (Pair[SJ].Classification) {
case Subscript::ZIV:
LLVM_DEBUG(dbgs() << "ZIV\n");
if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
return nullptr;
Mivs.reset(SJ);
break;
case Subscript::SIV:
Sivs.set(SJ);
Mivs.reset(SJ);
break;
case Subscript::RDIV:
case Subscript::MIV:
break;
default:
llvm_unreachable("bad subscript classification");
}
}
}
}
}
// test & propagate remaining RDIVs
for (unsigned SJ : Mivs.set_bits()) {
if (Pair[SJ].Classification == Subscript::RDIV) {
LLVM_DEBUG(dbgs() << "RDIV test\n");
if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
return nullptr;
// I don't yet understand how to propagate RDIV results
Mivs.reset(SJ);
}
}
// test remaining MIVs
// This code is temporary.
// Better to somehow test all remaining subscripts simultaneously.
for (unsigned SJ : Mivs.set_bits()) {
if (Pair[SJ].Classification == Subscript::MIV) {
LLVM_DEBUG(dbgs() << "MIV test\n");
if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
return nullptr;
}
else
llvm_unreachable("expected only MIV subscripts at this point");
}
// update Result.DV from constraint vector
LLVM_DEBUG(dbgs() << " updating\n");
for (unsigned SJ : ConstrainedLevels.set_bits()) {
if (SJ > CommonLevels)
break;
updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
return nullptr;
}
}
}
// Make sure the Scalar flags are set correctly.
SmallBitVector CompleteLoops(MaxLevels + 1);
for (unsigned SI = 0; SI < Pairs; ++SI)
CompleteLoops |= Pair[SI].Loops;
for (unsigned II = 1; II <= CommonLevels; ++II)
if (CompleteLoops[II])
Result.DV[II - 1].Scalar = false;
if (PossiblyLoopIndependent) {
// Make sure the LoopIndependent flag is set correctly.
// All directions must include equal, otherwise no
// loop-independent dependence is possible.
for (unsigned II = 1; II <= CommonLevels; ++II) {
if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
Result.LoopIndependent = false;
break;
}
}
}
else {
// On the other hand, if all directions are equal and there's no
// loop-independent dependence possible, then no dependence exists.
bool AllEqual = true;
for (unsigned II = 1; II <= CommonLevels; ++II) {
if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
AllEqual = false;
break;
}
}
if (AllEqual)
return nullptr;
}
return std::make_unique<FullDependence>(std::move(Result));
}
//===----------------------------------------------------------------------===//
// getSplitIteration -
// Rather than spend rarely-used space recording the splitting iteration
// during the Weak-Crossing SIV test, we re-compute it on demand.
// The re-computation is basically a repeat of the entire dependence test,
// though simplified since we know that the dependence exists.
// It's tedious, since we must go through all propagations, etc.
//
// Care is required to keep this code up to date with respect to the routine
// above, depends().
//
// Generally, the dependence analyzer will be used to build
// a dependence graph for a function (basically a map from instructions
// to dependences). Looking for cycles in the graph shows us loops
// that cannot be trivially vectorized/parallelized.
//
// We can try to improve the situation by examining all the dependences
// that make up the cycle, looking for ones we can break.
// Sometimes, peeling the first or last iteration of a loop will break
// dependences, and we've got flags for those possibilities.
// Sometimes, splitting a loop at some other iteration will do the trick,
// and we've got a flag for that case. Rather than waste the space to
// record the exact iteration (since we rarely know), we provide
// a method that calculates the iteration. It's a drag that it must work
// from scratch, but wonderful in that it's possible.
//
// Here's an example:
//
// for (i = 0; i < 10; i++)
// A[i] = ...
// ... = A[11 - i]
//
// There's a loop-carried flow dependence from the store to the load,
// found by the weak-crossing SIV test. The dependence will have a flag,
// indicating that the dependence can be broken by splitting the loop.
// Calling getSplitIteration will return 5.
// Splitting the loop breaks the dependence, like so:
//
// for (i = 0; i <= 5; i++)
// A[i] = ...
// ... = A[11 - i]
// for (i = 6; i < 10; i++)
// A[i] = ...
// ... = A[11 - i]
//
// breaks the dependence and allows us to vectorize/parallelize
// both loops.
const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
unsigned SplitLevel) {
assert(Dep.isSplitable(SplitLevel) &&
"Dep should be splitable at SplitLevel");
Instruction *Src = Dep.getSrc();
Instruction *Dst = Dep.getDst();
assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
assert(isLoadOrStore(Src));
assert(isLoadOrStore(Dst));
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
assert(underlyingObjectsAlias(
AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst),
MemoryLocation::get(Src)) == AliasResult::MustAlias);
// establish loop nesting levels
establishNestingLevels(Src, Dst);
FullDependence Result(Src, Dst, false, CommonLevels);
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3*i - 6] = i; ;; *B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837
2018-03-07 05:55:59 +08:00
unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
Pair[0].Src = SrcSCEV;
Pair[0].Dst = DstSCEV;
DA: remove uses of GEP, only ask SCEV It's been quite some time the Dependence Analysis (DA) is broken, as it uses the GEP representation to "identify" multi-dimensional arrays. It even wrongly detects multi-dimensional arrays in single nested loops: from test/Analysis/DependenceAnalysis/Coupled.ll, example @couple6 ;; for (long int i = 0; i < 50; i++) { ;; A[i][3*i - 6] = i; ;; *B++ = A[i][i]; DA used to detect two subscripts, which makes no sense in the LLVM IR or in C/C++ semantics, as there are no guarantees as in Fortran of subscripts not overlapping into a next array dimension: maximum nesting levels = 1 SrcPtrSCEV = %A DstPtrSCEV = %A using GEPs subscript 0 src = {0,+,1}<nuw><nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} subscript 1 src = {-6,+,3}<nsw><%for.body> dst = {0,+,1}<nuw><nsw><%for.body> class = 1 loops = {1} Separable = {} Coupled = {1} With the current patch, DA will correctly work on only one dimension: maximum nesting levels = 1 SrcSCEV = {(-2424 + %A)<nsw>,+,1212}<%for.body> DstSCEV = {%A,+,404}<%for.body> subscript 0 src = {(-2424 + %A)<nsw>,+,1212}<%for.body> dst = {%A,+,404}<%for.body> class = 1 loops = {1} Separable = {0} Coupled = {} This change removes all uses of GEP from DA, and we now only rely on the SCEV representation. The patch does not turn on -da-delinearize by default, and so the DA analysis will be more conservative in the case of multi-dimensional memory accesses in nested loops. I disabled some interchange tests, as the DA is not able to disambiguate the dependence anymore. To make DA stronger, we may need to compute a bound on the number of iterations based on the access functions and array dimensions. The patch cleans up all the CHECKs in test/Transforms/LoopInterchange/*.ll to avoid checking for snippets of LLVM IR: this form of checking is very hard to maintain. Instead, we now check for output of the pass that are more meaningful than dozens of lines of LLVM IR. Some tests now require -debug messages and thus only enabled with asserts. Patch written by Sebastian Pop and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D35430 llvm-svn: 326837
2018-03-07 05:55:59 +08:00
if (Delinearize) {
if (tryDelinearize(Src, Dst, Pair)) {
LLVM_DEBUG(dbgs() << " delinearized\n");
Pairs = Pair.size();
}
}
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
Pair[P].Group.resize(Pairs);
removeMatchingExtensions(&Pair[P]);
Pair[P].Classification =
classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
Pair[P].Loops);
Pair[P].GroupLoops = Pair[P].Loops;
Pair[P].Group.set(P);
}
SmallBitVector Separable(Pairs);
SmallBitVector Coupled(Pairs);
// partition subscripts into separable and minimally-coupled groups
for (unsigned SI = 0; SI < Pairs; ++SI) {
if (Pair[SI].Classification == Subscript::NonLinear) {
// ignore these, but collect loops for later
collectCommonLoops(Pair[SI].Src,
LI->getLoopFor(Src->getParent()),
Pair[SI].Loops);
collectCommonLoops(Pair[SI].Dst,
LI->getLoopFor(Dst->getParent()),
Pair[SI].Loops);
Result.Consistent = false;
}
else if (Pair[SI].Classification == Subscript::ZIV)
Separable.set(SI);
else {
// SIV, RDIV, or MIV, so check for coupled group
bool Done = true;
for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
SmallBitVector Intersection = Pair[SI].GroupLoops;
Intersection &= Pair[SJ].GroupLoops;
if (Intersection.any()) {
// accumulate set of all the loops in group
Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
// accumulate set of all subscripts in group
Pair[SJ].Group |= Pair[SI].Group;
Done = false;
}
}
if (Done) {
if (Pair[SI].Group.count() == 1)
Separable.set(SI);
else
Coupled.set(SI);
}
}
}
Constraint NewConstraint;
NewConstraint.setAny(SE);
// test separable subscripts
for (unsigned SI : Separable.set_bits()) {
switch (Pair[SI].Classification) {
case Subscript::SIV: {
unsigned Level;
const SCEV *SplitIter = nullptr;
(void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
Result, NewConstraint, SplitIter);
if (Level == SplitLevel) {
assert(SplitIter != nullptr);
return SplitIter;
}
break;
}
case Subscript::ZIV:
case Subscript::RDIV:
case Subscript::MIV:
break;
default:
llvm_unreachable("subscript has unexpected classification");
}
}
if (Coupled.count()) {
// test coupled subscript groups
SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
for (unsigned II = 0; II <= MaxLevels; ++II)
Constraints[II].setAny(SE);
for (unsigned SI : Coupled.set_bits()) {
SmallBitVector Group(Pair[SI].Group);
SmallBitVector Sivs(Pairs);
SmallBitVector Mivs(Pairs);
SmallBitVector ConstrainedLevels(MaxLevels + 1);
for (unsigned SJ : Group.set_bits()) {
if (Pair[SJ].Classification == Subscript::SIV)
Sivs.set(SJ);
else
Mivs.set(SJ);
}
while (Sivs.any()) {
bool Changed = false;
for (unsigned SJ : Sivs.set_bits()) {
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
const SCEV *SplitIter = nullptr;
(void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
Result, NewConstraint, SplitIter);
if (Level == SplitLevel && SplitIter)
return SplitIter;
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint))
Changed = true;
Sivs.reset(SJ);
}
if (Changed) {
// propagate, possibly creating new SIVs and ZIVs
for (unsigned SJ : Mivs.set_bits()) {
// SJ is an MIV subscript that's part of the current coupled group
if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
Pair[SJ].Loops, Constraints, Result.Consistent)) {
Pair[SJ].Classification =
classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
Pair[SJ].Loops);
switch (Pair[SJ].Classification) {
case Subscript::ZIV:
Mivs.reset(SJ);
break;
case Subscript::SIV:
Sivs.set(SJ);
Mivs.reset(SJ);
break;
case Subscript::RDIV:
case Subscript::MIV:
break;
default:
llvm_unreachable("bad subscript classification");
}
}
}
}
}
}
}
llvm_unreachable("somehow reached end of routine");
return nullptr;
}