forked from OSchip/llvm-project
[GlobalsAA] Teach GlobalsAA about nocapture
Arguments to function calls marked "nocapture" can be marked as non-escaping. However, nocapture is defined in terms of the lifetime of the callee, and if the callee can directly or indirectly recurse to the caller, the semantics of nocapture are invalid. Therefore, we eagerly discover which SCC each function belongs to, and later can check if callee and caller of a callsite belong to the same SCC, in which case there could be recursion. This means that we can't be so optimistic in getModRefInfo(ImmutableCallsite) - previously we assumed all call arguments never aliased with an escaping global. Now we need to check, because a global could now be passed as an argument but still not escape. This also solves a related conformance problem: MemCpyOptimizer can turn non-escaping stores of globals into calls to intrinsics like llvm.memcpy/llvm/memset. This confuses GlobalsAA, which knows the global can't escape and so returns NoModRef when queried, when obviously a memcpy/memset call does indeed reference and modify its arguments. This fixes PR24800, PR24801, and PR24802. llvm-svn: 248576
This commit is contained in:
parent
eeb22f8251
commit
eb46641c28
|
@ -50,6 +50,11 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
|
|||
/// For each function, keep track of what globals are modified or read.
|
||||
DenseMap<const Function *, FunctionInfo> FunctionInfos;
|
||||
|
||||
/// A map of functions to SCC. The SCCs are described by a simple integer
|
||||
/// ID that is only useful for comparing for equality (are two functions
|
||||
/// in the same SCC or not?)
|
||||
DenseMap<const Function *, unsigned> FunctionToSCCMap;
|
||||
|
||||
/// Handle to clear this analysis on deletion of values.
|
||||
struct DeletionCallbackHandle final : CallbackVH {
|
||||
GlobalsAAResult *GAR;
|
||||
|
@ -103,8 +108,11 @@ private:
|
|||
SmallPtrSetImpl<Function *> *Writers = nullptr,
|
||||
GlobalValue *OkayStoreDest = nullptr);
|
||||
bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
|
||||
|
||||
void CollectSCCMembership(CallGraph &CG);
|
||||
|
||||
bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
|
||||
ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
|
||||
const GlobalValue *GV);
|
||||
};
|
||||
|
||||
/// Analysis pass providing a never-invalidated alias analysis result.
|
||||
|
|
|
@ -358,6 +358,21 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
|
|||
if (isFreeCall(I, &TLI)) {
|
||||
if (Writers)
|
||||
Writers->insert(CS->getParent()->getParent());
|
||||
} else if (CS.doesNotCapture(CS.getArgumentNo(&U))) {
|
||||
Function *ParentF = CS->getParent()->getParent();
|
||||
// A nocapture argument may be read from or written to, but does not
|
||||
// escape unless the call can somehow recurse.
|
||||
//
|
||||
// nocapture "indicates that the callee does not make any copies of
|
||||
// the pointer that outlive itself". Therefore if we directly or
|
||||
// indirectly recurse, we must treat the pointer as escaping.
|
||||
if (FunctionToSCCMap[ParentF] ==
|
||||
FunctionToSCCMap[CS.getCalledFunction()])
|
||||
return true;
|
||||
if (Readers)
|
||||
Readers->insert(ParentF);
|
||||
if (Writers)
|
||||
Writers->insert(ParentF);
|
||||
} else {
|
||||
return true; // Argument of an unknown call.
|
||||
}
|
||||
|
@ -439,6 +454,21 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
|
||||
// We do a bottom-up SCC traversal of the call graph. In other words, we
|
||||
// visit all callees before callers (leaf-first).
|
||||
unsigned SCCID = 0;
|
||||
for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
|
||||
const std::vector<CallGraphNode *> &SCC = *I;
|
||||
assert(!SCC.empty() && "SCC with no functions?");
|
||||
|
||||
for (auto *CGN : SCC)
|
||||
if (Function *F = CGN->getFunction())
|
||||
FunctionToSCCMap[F] = SCCID;
|
||||
++SCCID;
|
||||
}
|
||||
}
|
||||
|
||||
/// AnalyzeCallGraph - At this point, we know the functions where globals are
|
||||
/// immediately stored to and read from. Propagate this information up the call
|
||||
/// graph to all callers and compute the mod/ref info for all memory for each
|
||||
|
@ -765,6 +795,32 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
|
|||
return AAResultBase::alias(LocA, LocB);
|
||||
}
|
||||
|
||||
ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
|
||||
const GlobalValue *GV) {
|
||||
if (CS.doesNotAccessMemory())
|
||||
return MRI_NoModRef;
|
||||
ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
|
||||
|
||||
// Iterate through all the arguments to the called function. If any argument
|
||||
// is based on GV, return the conservative result.
|
||||
for (auto &A : CS.args()) {
|
||||
SmallVector<Value*, 4> Objects;
|
||||
GetUnderlyingObjects(A, Objects, DL);
|
||||
|
||||
// All objects must be identified.
|
||||
if (!std::all_of(Objects.begin(), Objects.end(), [&GV](const Value *V) {
|
||||
return isIdentifiedObject(V);
|
||||
}))
|
||||
return ConservativeResult;
|
||||
|
||||
if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
|
||||
return ConservativeResult;
|
||||
}
|
||||
|
||||
// We identified all objects in the argument list, and none of them were GV.
|
||||
return MRI_NoModRef;
|
||||
}
|
||||
|
||||
ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
|
||||
const MemoryLocation &Loc) {
|
||||
unsigned Known = MRI_ModRef;
|
||||
|
@ -777,7 +833,8 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
|
|||
if (const Function *F = CS.getCalledFunction())
|
||||
if (NonAddressTakenGlobals.count(GV))
|
||||
if (const FunctionInfo *FI = getFunctionInfo(F))
|
||||
Known = FI->getModRefInfoForGlobal(*GV);
|
||||
Known = FI->getModRefInfoForGlobal(*GV) |
|
||||
getModRefInfoForArgument(CS, GV);
|
||||
|
||||
if (Known == MRI_NoModRef)
|
||||
return MRI_NoModRef; // No need to query other mod/ref analyses
|
||||
|
@ -807,6 +864,9 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
|
|||
CallGraph &CG) {
|
||||
GlobalsAAResult Result(M.getDataLayout(), TLI);
|
||||
|
||||
// Discover which functions aren't recursive, to feed into AnalyzeGlobals.
|
||||
Result.CollectSCCMembership(CG);
|
||||
|
||||
// Find non-addr taken globals.
|
||||
Result.AnalyzeGlobals(M);
|
||||
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
|
||||
@a = internal global [3 x i32] zeroinitializer, align 4
|
||||
@b = common global i32 0, align 4
|
||||
|
||||
; The important thing we're checking for here is the reload of (some element of)
|
||||
; @a after the memset.
|
||||
|
||||
; CHECK-LABEL: @main
|
||||
; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a
|
||||
; CHECK: store i32 3
|
||||
; CHECK: load i32, i32* getelementptr {{.*}} @a
|
||||
; CHECK: icmp eq i32
|
||||
; CHECK: br i1
|
||||
|
||||
define i32 @main() {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
%c = alloca [1 x i32], align 4
|
||||
store i32 0, i32* %retval, align 4
|
||||
%0 = bitcast [1 x i32]* %c to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
|
||||
store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
|
||||
store i32 0, i32* @b, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%1 = load i32, i32* @b, align 4
|
||||
%cmp = icmp slt i32 %1, 3
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%2 = load i32, i32* @b, align 4
|
||||
%idxprom = sext i32 %2 to i64
|
||||
%arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%3 = load i32, i32* @b, align 4
|
||||
%inc = add nsw i32 %3, 1
|
||||
store i32 %inc, i32* @b, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
|
||||
%cmp1 = icmp ne i32 %4, 0
|
||||
br i1 %cmp1, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %for.end
|
||||
call void @abort() #3
|
||||
unreachable
|
||||
|
||||
if.end: ; preds = %for.end
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind argmemonly
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
|
||||
|
||||
; Function Attrs: noreturn nounwind
|
||||
declare void @abort() noreturn nounwind
|
|
@ -0,0 +1,47 @@
|
|||
; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -S 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
|
||||
@a = internal global i32 0, align 4
|
||||
@b = internal global i32 0, align 4
|
||||
|
||||
define void @g(i32* %p, void (i32*)* nocapture %ptr) {
|
||||
entry:
|
||||
tail call void %ptr(i32* %p) #1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Function: f
|
||||
; CHECK: MayAlias: i32* %p, i32* @a
|
||||
; CHECK: MayAlias: i32* %q, i32* @a
|
||||
define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
|
||||
entry:
|
||||
tail call void @g(i32* nonnull @a, void (i32*)* %ptr)
|
||||
%arrayidx = getelementptr inbounds i32, i32* %p, i64 0
|
||||
%z1 = load i32, i32* %arrayidx, align 4
|
||||
%z2 = load i32, i32* %q, align 4
|
||||
%add = add nsw i32 %z2, %z1
|
||||
store i32 %add, i32* %q, align 4
|
||||
ret i32 4
|
||||
}
|
||||
|
||||
define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) {
|
||||
entry:
|
||||
tail call void %ptr(i32* %p) #1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Function: f2
|
||||
; CHECK: NoAlias: i32* %p, i32* @b
|
||||
; CHECK: NoAlias: i32* %q, i32* @b
|
||||
define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
|
||||
entry:
|
||||
tail call void @g2(i32* nonnull @b, void (i32*)* %ptr)
|
||||
%arrayidx = getelementptr inbounds i32, i32* %p, i64 0
|
||||
%z1 = load i32, i32* %arrayidx, align 4
|
||||
%z2 = load i32, i32* %q, align 4
|
||||
%add = add nsw i32 %z2, %z1
|
||||
store i32 %add, i32* %q, align 4
|
||||
ret i32 4
|
||||
}
|
Loading…
Reference in New Issue