[FunctionAttrs] Infer argmemonly .

This patch adds initial argmemonly inference, by checking the underlying
objects of locations returned by MemoryLocation.

I think this should cover most cases, except function calls to other
argmemonly functions.

I'm not sure if there's a reason why we don't infer those yet.

Additional argmemonly can improve codegen in some cases. It also makes
it easier to come up with a C reproducer for 7662d1687b (already fixed,
but I'm trying to see if C/C++ fuzzing could help to uncover similar
issues.)

Compile-time impact:
NewPM-O3: +0.01%
NewPM-ReleaseThinLTO: +0.03%
NewPM-ReleaseLTO+g: +0.05%

https://llvm-compile-time-tracker.com/compare.php?from=067c035012fc061ad6378458774ac2df117283c6&to=fe209d4aab5b593bd62d18c0876732ddcca1614d&stat=instructions

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D121415
This commit is contained in:
Florian Hahn 2022-03-16 10:24:33 +00:00
parent 0aab344104
commit e5822ded56
No known key found for this signature in database
GPG Key ID: EEF712BB5E80EBBA
8 changed files with 94 additions and 58 deletions

View File

@ -69,6 +69,7 @@ using namespace llvm;
#define DEBUG_TYPE "function-attrs"
STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly");
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumWriteOnly, "Number of functions marked writeonly");
@ -135,6 +136,14 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
// Track if the function accesses memory not based on pointer arguments or
// allocas.
bool AccessesNonArgsOrAlloca = false;
// Returns true if Ptr is not based on a function argument.
auto IsArgumentOrAlloca = [](const Value *Ptr) {
const Value *UO = getUnderlyingObject(Ptr);
return isa<Argument>(UO) || isa<AllocaInst>(UO);
};
for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
@ -167,6 +176,7 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
// If it reads, note it.
if (isRefSet(MRI))
ReadsMemory = true;
AccessesNonArgsOrAlloca = true;
continue;
}
@ -179,12 +189,13 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
MemoryLocation Loc =
MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
if (isModSet(MRI))
// Writes non-local memory.
WritesMemory = true;
@ -194,24 +205,29 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
}
continue;
} else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
MemoryLocation Loc = MemoryLocation::get(LI);
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(LI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
if (!LI->isVolatile() &&
AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
MemoryLocation Loc = MemoryLocation::get(SI);
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(SI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
if (!SI->isVolatile() &&
AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else {
// If AccessesNonArgsOrAlloca has not been updated above, set it
// conservatively.
AccessesNonArgsOrAlloca |= I.mayReadOrWriteMemory();
}
// Any remaining instructions need to be taken seriously! Check if they
@ -224,14 +240,17 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
ReadsMemory |= I.mayReadFromMemory();
}
if (WritesMemory) {
if (!ReadsMemory)
return FMRB_OnlyWritesMemory;
else
return FMRB_UnknownModRefBehavior;
}
if (!WritesMemory && !ReadsMemory)
return FMRB_DoesNotAccessMemory;
return ReadsMemory ? FMRB_OnlyReadsMemory : FMRB_DoesNotAccessMemory;
FunctionModRefBehavior Result = FunctionModRefBehavior(FMRL_Anywhere);
if (!AccessesNonArgsOrAlloca)
Result = FunctionModRefBehavior(FMRL_ArgumentPointees);
if (WritesMemory)
Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Mod));
if (ReadsMemory)
Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Ref));
return Result;
}
FunctionModRefBehavior llvm::computeFunctionBodyMemoryAccess(Function &F,
@ -247,32 +266,48 @@ static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
bool WritesMemory = false;
// Check if all functions only access memory through their arguments.
bool ArgMemOnly = true;
for (Function *F : SCCNodes) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
// Non-exact function definitions may not be selected at link time, and an
// alternative version that writes to memory may be selected. See the
// comment on GlobalValue::isDefinitionExact for more details.
FunctionModRefBehavior FMRB =
checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes);
if (isModAndRefSet(createModRefInfo(FMRB)))
return;
if (FMRB == FMRB_DoesNotAccessMemory)
continue;
ReadsMemory |= AliasAnalysis::onlyReadsMemory(FMRB);
WritesMemory |= AliasAnalysis::onlyWritesMemory(FMRB);
ModRefInfo MR = createModRefInfo(FMRB);
ReadsMemory |= isRefSet(MR);
WritesMemory |= isModSet(MR);
ArgMemOnly &= AliasAnalysis::onlyAccessesArgPointees(FMRB);
// Reached neither readnone, readonly, writeonly nor argmemonly can be
// inferred. Exit.
if (ReadsMemory && WritesMemory && !ArgMemOnly)
return;
}
// If the SCC contains both functions that read and functions that write, then
// we cannot add readonly attributes.
if (ReadsMemory && WritesMemory)
return;
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
assert((!ReadsMemory || !WritesMemory || ArgMemOnly) &&
"no memory attributes can be added for this SCC, should have exited "
"earlier");
// Success! Functions in this SCC do not access memory, only read memory,
// only write memory, or only access memory through its arguments. Give them
// the appropriate attribute.
for (Function *F : SCCNodes) {
// If possible add argmemonly attribute to F, if it accesses memory.
if (ArgMemOnly && !F->onlyAccessesArgMemory() &&
(ReadsMemory || WritesMemory)) {
NumArgMemOnly++;
F->addFnAttr(Attribute::ArgMemOnly);
Changed.insert(F);
}
// The SCC contains functions both writing and reading from memory. We
// cannot add readonly or writeonline attributes.
if (ReadsMemory && WritesMemory)
continue;
if (F->doesNotAccessMemory())
// Already perfect!
continue;

View File

@ -73,12 +73,12 @@ declare void @callee(i32* %p) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind
; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn }
; CHECK: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn writeonly }
; CHECK: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly }
; CHECK: attributes #2 = { nofree nounwind readonly }
; CHECK: attributes #3 = { nounwind }
; CHECK: attributes #4 = { mustprogress nofree nosync nounwind readnone willreturn }
; CHECK: attributes #5 = { mustprogress nofree nosync nounwind willreturn }
; CHECK: attributes #6 = { mustprogress nofree norecurse nosync nounwind willreturn }
; CHECK: attributes #5 = { argmemonly mustprogress nofree nosync nounwind willreturn }
; CHECK: attributes #6 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn }
; CHECK: attributes #7 = { argmemonly nofree nounwind willreturn }
; Root note.

View File

@ -7,13 +7,13 @@
; GCN: %mul.i = fmul float %load, 1.500000e+01
; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" }
; UNSAFE: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
; UNSAFE: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" }
; NOINFS: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
; NOINFS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" }
; NONANS: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
; NONANS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
define float @foo(float %x) #0 {
entry:

View File

@ -14,7 +14,7 @@ entry:
}
define i32 @test_only_read_arg(i32* %ptr) {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
; CHECK-LABEL: @test_only_read_arg(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[PTR:%.*]], align 4
@ -52,7 +52,7 @@ entry:
}
define void @test_only_write_arg(i32* %ptr) {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly
; CHECK-LABEL: @test_only_write_arg(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4
@ -91,7 +91,7 @@ entry:
declare i32 @fn_readnone() readnone
define void @test_call_readnone(i32* %ptr) {
; CHECK: Function Attrs: writeonly
; CHECK: Function Attrs: argmemonly writeonly
; CHECK-LABEL: @test_call_readnone(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = call i32 @fn_readnone()
@ -118,7 +118,7 @@ entry:
}
define i32 @test_call_fn_where_argmemonly_can_be_inferred(i32* %ptr) {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
; CHECK-LABEL: @test_call_fn_where_argmemonly_can_be_inferred(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = call i32 @test_only_read_arg(i32* [[PTR:%.*]])
@ -130,7 +130,7 @@ entry:
}
define void @test_memcpy_argonly(i8* %dst, i8* %src) {
; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
; CHECK-LABEL: @test_memcpy_argonly(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST:%.*]], i8* [[SRC:%.*]], i64 32, i1 false)
@ -174,7 +174,7 @@ entry:
}
define i32 @test_read_arg_access_alloca(i32* %ptr) {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
; CHECK-LABEL: @test_read_arg_access_alloca(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4

View File

@ -21,4 +21,4 @@ entry:
}
; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone ssp willreturn uwtable }
; CHECK: attributes #1 = { mustprogress nofree norecurse nounwind ssp willreturn uwtable }
; CHECK: attributes #1 = { argmemonly mustprogress nofree norecurse nounwind ssp willreturn uwtable }

View File

@ -36,7 +36,7 @@ entry:
declare void @free(i8* nocapture) local_unnamed_addr #2
define i32 @_Z4foo3Pi(i32* nocapture readonly %a) local_unnamed_addr #3 {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable
; CHECK-LABEL: @_Z4foo3Pi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4

View File

@ -49,7 +49,7 @@ define i32 @test4(i32 %a, i32 %b) {
; negative case - explicit sync
define void @test5(i8* %p) {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn
; CHECK-LABEL: @test5(
; CHECK-NEXT: store atomic i8 0, i8* [[P:%.*]] seq_cst, align 1
; CHECK-NEXT: ret void
@ -60,7 +60,7 @@ define void @test5(i8* %p) {
; negative case - explicit sync
define i8 @test6(i8* %p) {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[V:%.*]] = load atomic i8, i8* [[P:%.*]] seq_cst, align 1
; CHECK-NEXT: ret i8 [[V]]
@ -104,7 +104,7 @@ define void @test9(i8* %p) {
; atomic load with monotonic ordering
define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK-LABEL: @load_monotonic(
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] monotonic, align 4
; CHECK-NEXT: ret i32 [[TMP2]]
@ -115,7 +115,7 @@ define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtabl
; atomic store with monotonic ordering.
define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK-LABEL: @store_monotonic(
; CHECK-NEXT: store atomic i32 10, i32* [[TMP0:%.*]] monotonic, align 4
; CHECK-NEXT: ret void
@ -127,7 +127,7 @@ define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable {
; negative, should not deduce nosync
; atomic load with acquire ordering.
define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK-LABEL: @load_acquire(
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] acquire, align 4
; CHECK-NEXT: ret i32 [[TMP2]]
@ -137,7 +137,7 @@ define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable
}
define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable
; CHECK-LABEL: @load_unordered(
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] unordered, align 4
; CHECK-NEXT: ret i32 [[TMP2]]
@ -148,7 +148,7 @@ define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtabl
; atomic store with unordered ordering.
define void @store_unordered(i32* nocapture %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly uwtable
; CHECK-LABEL: @store_unordered(
; CHECK-NEXT: store atomic i32 10, i32* [[TMP0:%.*]] unordered, align 4
; CHECK-NEXT: ret void
@ -161,7 +161,7 @@ define void @store_unordered(i32* nocapture %0) norecurse nounwind uwtable {
; negative, should not deduce nosync
; atomic load with release ordering
define void @load_release(i32* nocapture %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: nofree norecurse nounwind uwtable
; CHECK: Function Attrs: argmemonly nofree norecurse nounwind uwtable
; CHECK-LABEL: @load_release(
; CHECK-NEXT: store atomic volatile i32 10, i32* [[TMP0:%.*]] release, align 4
; CHECK-NEXT: ret void
@ -172,7 +172,7 @@ define void @load_release(i32* nocapture %0) norecurse nounwind uwtable {
; negative volatile, relaxed atomic
define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: nofree norecurse nounwind uwtable
; CHECK: Function Attrs: argmemonly nofree norecurse nounwind uwtable
; CHECK-LABEL: @load_volatile_release(
; CHECK-NEXT: store atomic volatile i32 10, i32* [[TMP0:%.*]] release, align 4
; CHECK-NEXT: ret void
@ -183,7 +183,7 @@ define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable
; volatile store.
define void @volatile_store(i32* %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: nofree norecurse nounwind uwtable
; CHECK: Function Attrs: argmemonly nofree norecurse nounwind uwtable
; CHECK-LABEL: @volatile_store(
; CHECK-NEXT: store volatile i32 14, i32* [[TMP0:%.*]], align 4
; CHECK-NEXT: ret void
@ -195,7 +195,7 @@ define void @volatile_store(i32* %0) norecurse nounwind uwtable {
; negative, should not deduce nosync
; volatile load.
define i32 @volatile_load(i32* %0) norecurse nounwind uwtable {
; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable
; CHECK-LABEL: @volatile_load(
; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0:%.*]], align 4
; CHECK-NEXT: ret i32 [[TMP2]]
@ -237,7 +237,7 @@ declare void @llvm.memset(i8* %dest, i8 %val, i32 %len, i1 %isvolatile)
; negative, checking volatile intrinsics.
define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) {
; CHECK: Function Attrs: mustprogress nofree nounwind willreturn
; CHECK: Function Attrs: argmemonly mustprogress nofree nounwind willreturn
; CHECK-LABEL: @memcpy_volatile(
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1:%.*]], i8* [[PTR2:%.*]], i32 8, i1 true)
; CHECK-NEXT: ret i32 4
@ -248,7 +248,7 @@ define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) {
; positive, non-volatile intrinsic.
define i32 @memset_non_volatile(i8* %ptr1, i8 %val) {
; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly
; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn writeonly
; CHECK-LABEL: @memset_non_volatile(
; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false)
; CHECK-NEXT: ret i32 4

View File

@ -16,7 +16,8 @@ entry:
ret void
}
; CHECK: 1 function-attrs - Number of arguments marked nocapture
; CHECK: 1 function-attrs - Number of functions marked argmemonly
; CHECK-NEXT: 1 function-attrs - Number of arguments marked nocapture
; CHECK-NEXT: 1 function-attrs - Number of functions marked as nofree
; CHECK-NEXT: 2 function-attrs - Number of functions marked as norecurse
; CHECK-NEXT: 2 function-attrs - Number of functions marked as nosync