[GlobalsAA] Teach GlobalsAA about nocapture

Arguments to function calls marked "nocapture" can be marked as
non-escaping. However, nocapture is defined in terms of the lifetime
of the callee, and if the callee can directly or indirectly recurse to
the caller, the semantics of nocapture are invalid.

Therefore, we eagerly discover which SCC each function belongs to,
and later can check if callee and caller of a callsite belong to
the same SCC, in which case there could be recursion.

This means that we can't be so optimistic in
getModRefInfo(ImmutableCallsite) - previously we assumed all call
arguments never aliased with an escaping global. Now we need to check,
because a global could now be passed as an argument but still not
escape.

This also solves a related conformance problem: MemCpyOptimizer can
turn non-escaping stores of globals into calls to intrinsics like
llvm.memcpy/llvm/memset. This confuses GlobalsAA, which knows the
global can't escape and so returns NoModRef when queried, when
obviously a memcpy/memset call does indeed reference and modify its
arguments.

This fixes PR24800, PR24801, and PR24802.

llvm-svn: 248576
This commit is contained in:
James Molloy 2015-09-25 15:39:29 +00:00
parent eeb22f8251
commit eb46641c28
4 changed files with 182 additions and 2 deletions

View File

@ -50,6 +50,11 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
/// For each function, keep track of what globals are modified or read.
DenseMap<const Function *, FunctionInfo> FunctionInfos;
/// A map of functions to SCC. The SCCs are described by a simple integer
/// ID that is only useful for comparing for equality (are two functions
/// in the same SCC or not?)
DenseMap<const Function *, unsigned> FunctionToSCCMap;
/// Handle to clear this analysis on deletion of values.
struct DeletionCallbackHandle final : CallbackVH {
GlobalsAAResult *GAR;
@ -103,8 +108,11 @@ private:
SmallPtrSetImpl<Function *> *Writers = nullptr,
GlobalValue *OkayStoreDest = nullptr);
bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
void CollectSCCMembership(CallGraph &CG);
bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
const GlobalValue *GV);
};
/// Analysis pass providing a never-invalidated alias analysis result.

View File

@ -358,6 +358,21 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
if (isFreeCall(I, &TLI)) {
if (Writers)
Writers->insert(CS->getParent()->getParent());
} else if (CS.doesNotCapture(CS.getArgumentNo(&U))) {
Function *ParentF = CS->getParent()->getParent();
// A nocapture argument may be read from or written to, but does not
// escape unless the call can somehow recurse.
//
// nocapture "indicates that the callee does not make any copies of
// the pointer that outlive itself". Therefore if we directly or
// indirectly recurse, we must treat the pointer as escaping.
if (FunctionToSCCMap[ParentF] ==
FunctionToSCCMap[CS.getCalledFunction()])
return true;
if (Readers)
Readers->insert(ParentF);
if (Writers)
Writers->insert(ParentF);
} else {
return true; // Argument of an unknown call.
}
@ -439,6 +454,21 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
return true;
}
void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
// We do a bottom-up SCC traversal of the call graph. In other words, we
// visit all callees before callers (leaf-first).
unsigned SCCID = 0;
for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
const std::vector<CallGraphNode *> &SCC = *I;
assert(!SCC.empty() && "SCC with no functions?");
for (auto *CGN : SCC)
if (Function *F = CGN->getFunction())
FunctionToSCCMap[F] = SCCID;
++SCCID;
}
}
/// AnalyzeCallGraph - At this point, we know the functions where globals are
/// immediately stored to and read from. Propagate this information up the call
/// graph to all callers and compute the mod/ref info for all memory for each
@ -765,6 +795,32 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
return AAResultBase::alias(LocA, LocB);
}
ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
const GlobalValue *GV) {
if (CS.doesNotAccessMemory())
return MRI_NoModRef;
ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
for (auto &A : CS.args()) {
SmallVector<Value*, 4> Objects;
GetUnderlyingObjects(A, Objects, DL);
// All objects must be identified.
if (!std::all_of(Objects.begin(), Objects.end(), [&GV](const Value *V) {
return isIdentifiedObject(V);
}))
return ConservativeResult;
if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
return ConservativeResult;
}
// We identified all objects in the argument list, and none of them were GV.
return MRI_NoModRef;
}
ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
const MemoryLocation &Loc) {
unsigned Known = MRI_ModRef;
@ -777,7 +833,8 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
Known = FI->getModRefInfoForGlobal(*GV);
Known = FI->getModRefInfoForGlobal(*GV) |
getModRefInfoForArgument(CS, GV);
if (Known == MRI_NoModRef)
return MRI_NoModRef; // No need to query other mod/ref analyses
@ -807,6 +864,9 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
CallGraph &CG) {
GlobalsAAResult Result(M.getDataLayout(), TLI);
// Discover which functions aren't recursive, to feed into AnalyzeGlobals.
Result.CollectSCCMembership(CG);
// Find non-addr taken globals.
Result.AnalyzeGlobals(M);

View File

@ -0,0 +1,65 @@
; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
@a = internal global [3 x i32] zeroinitializer, align 4
@b = common global i32 0, align 4
; The important thing we're checking for here is the reload of (some element of)
; @a after the memset.
; CHECK-LABEL: @main
; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a
; CHECK: store i32 3
; CHECK: load i32, i32* getelementptr {{.*}} @a
; CHECK: icmp eq i32
; CHECK: br i1
define i32 @main() {
entry:
%retval = alloca i32, align 4
%c = alloca [1 x i32], align 4
store i32 0, i32* %retval, align 4
%0 = bitcast [1 x i32]* %c to i8*
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
store i32 0, i32* @b, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%1 = load i32, i32* @b, align 4
%cmp = icmp slt i32 %1, 3
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load i32, i32* @b, align 4
%idxprom = sext i32 %2 to i64
%arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%3 = load i32, i32* @b, align 4
%inc = add nsw i32 %3, 1
store i32 %inc, i32* @b, align 4
br label %for.cond
for.end: ; preds = %for.cond
%4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
%cmp1 = icmp ne i32 %4, 0
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.end
call void @abort() #3
unreachable
if.end: ; preds = %for.end
ret i32 0
}
; Function Attrs: nounwind argmemonly
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
; Function Attrs: noreturn nounwind
declare void @abort() noreturn nounwind

View File

@ -0,0 +1,47 @@
; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -S 2>&1 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
@a = internal global i32 0, align 4
@b = internal global i32 0, align 4
define void @g(i32* %p, void (i32*)* nocapture %ptr) {
entry:
tail call void %ptr(i32* %p) #1
ret void
}
; CHECK-LABEL: Function: f
; CHECK: MayAlias: i32* %p, i32* @a
; CHECK: MayAlias: i32* %q, i32* @a
define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
entry:
tail call void @g(i32* nonnull @a, void (i32*)* %ptr)
%arrayidx = getelementptr inbounds i32, i32* %p, i64 0
%z1 = load i32, i32* %arrayidx, align 4
%z2 = load i32, i32* %q, align 4
%add = add nsw i32 %z2, %z1
store i32 %add, i32* %q, align 4
ret i32 4
}
define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) {
entry:
tail call void %ptr(i32* %p) #1
ret void
}
; CHECK-LABEL: Function: f2
; CHECK: NoAlias: i32* %p, i32* @b
; CHECK: NoAlias: i32* %q, i32* @b
define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
entry:
tail call void @g2(i32* nonnull @b, void (i32*)* %ptr)
%arrayidx = getelementptr inbounds i32, i32* %p, i64 0
%z1 = load i32, i32* %arrayidx, align 4
%z2 = load i32, i32* %q, align 4
%add = add nsw i32 %z2, %z1
store i32 %add, i32* %q, align 4
ret i32 4
}