forked from OSchip/llvm-project
[MemCpyOpt] Make capture check during call slot optimization more precise
Call slot optimization is currently supposed to be prevented if the call can capture the source pointer. Due to an implementation bug, this check currently doesn't trigger if a bitcast of the source pointer is passed instead. I'm somewhat afraid of the fallout of fixing this bug (due to heavy reliance on call slot optimization in rust), so I'd like to strengthen the capture reasoning a bit first. In particular, I believe that the capture is fine as long as a) the call itself cannot depend on the pointer identity, because neither dest has been captured before/at nor src before the call and b) there is no potential use of the captured pointer before the lifetime of the source alloca ends, either due to lifetime.end or a return from a function. At that point the potentially captured pointer becomes dangling. Differential Revision: https://reviews.llvm.org/D115615
This commit is contained in:
parent
c2e77c9122
commit
487a34ed9d
|
@ -20,6 +20,7 @@
|
|||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
|
@ -945,12 +946,56 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Check that src isn't captured by the called function since the
|
||||
// transformation can cause aliasing issues in that case.
|
||||
for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
|
||||
if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
|
||||
// Check whether src is captured by the called function, in which case there
|
||||
// may be further indirect uses of src.
|
||||
bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
|
||||
return U == cpySrc && !C->doesNotCapture(C->getArgOperandNo(&U));
|
||||
});
|
||||
|
||||
// If src is captured, then check whether there are any potential uses of
|
||||
// src through the captured pointer before the lifetime of src ends, either
|
||||
// due to a lifetime.end or a return from the function.
|
||||
if (SrcIsCaptured) {
|
||||
// Check that dest is not captured before/at the call. We have already
|
||||
// checked that src is not captured before it. If either had been captured,
|
||||
// then the call might be comparing the argument against the captured dest
|
||||
// or src pointer.
|
||||
Value *DestObj = getUnderlyingObject(cpyDest);
|
||||
if (!isIdentifiedFunctionLocal(DestObj) ||
|
||||
PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
|
||||
/* StoreCaptures */ true, C, DT,
|
||||
/* IncludeI */ true))
|
||||
return false;
|
||||
|
||||
MemoryLocation SrcLoc =
|
||||
MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
|
||||
for (Instruction &I :
|
||||
make_range(++C->getIterator(), C->getParent()->end())) {
|
||||
// Lifetime of srcAlloca ends at lifetime.end.
|
||||
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
|
||||
if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
|
||||
II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
|
||||
cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
|
||||
break;
|
||||
}
|
||||
|
||||
// Lifetime of srcAlloca ends at return.
|
||||
if (isa<ReturnInst>(&I))
|
||||
break;
|
||||
|
||||
// Ignore the direct read of src in the load.
|
||||
if (&I == cpyLoad)
|
||||
continue;
|
||||
|
||||
// Check whether this instruction may mod/ref src through the captured
|
||||
// pointer (we have already any direct mod/refs in the loop above).
|
||||
// Also bail if we hit a terminator, as we don't want to scan into other
|
||||
// blocks.
|
||||
if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Since we're changing the parameter to the callsite, we need to make sure
|
||||
// that what would be the new parameter dominates the callsite.
|
||||
if (!DT->dominates(cpyDest, C)) {
|
||||
|
|
|
@ -57,8 +57,7 @@ define void @test_lifetime_end() {
|
|||
; CHECK-NEXT: [[PTR1:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: [[PTR2:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[PTR2]])
|
||||
; CHECK-NEXT: call void @foo(i8* [[PTR2]])
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
|
||||
; CHECK-NEXT: call void @foo(i8* [[PTR1]])
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[PTR2]])
|
||||
; CHECK-NEXT: call void @foo(i8* [[PTR1]])
|
||||
; CHECK-NEXT: ret void
|
||||
|
@ -101,8 +100,7 @@ define void @test_function_end() {
|
|||
; CHECK-LABEL: define {{[^@]+}}@test_function_end() {
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: [[PTR2:%.*]] = alloca i8, align 1
|
||||
; CHECK-NEXT: call void @foo(i8* [[PTR2]])
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
|
||||
; CHECK-NEXT: call void @foo(i8* [[PTR1]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%ptr1 = alloca i8
|
||||
|
|
Loading…
Reference in New Issue