forked from OSchip/llvm-project
[CodeExtractor] Emit lifetime markers around reloads of outputs
CodeExtractor permits extracting a region of blocks from a function even when values defined within the region are used outside of it. This is typically done by creating an alloca in the original function and reloading the alloca after a call to the extracted function. Wrap the reload in lifetime start/end markers to promote stack coloring. Suggested by Sergei Kachkov! Differential Revision: https://reviews.llvm.org/D56045 llvm-svn: 351621
This commit is contained in:
parent
c089fbbaf2
commit
17d9f14bff
|
@ -880,6 +880,71 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
|
|||
return newFunction;
|
||||
}
|
||||
|
||||
/// Scan the extraction region for lifetime markers which reference inputs.
|
||||
/// Erase these markers. Return the inputs which were referenced.
|
||||
///
|
||||
/// The extraction region is defined by a set of blocks (\p Blocks), and a set
|
||||
/// of allocas which will be moved from the caller function into the extracted
|
||||
/// function (\p SunkAllocas).
|
||||
static SetVector<Value *>
|
||||
eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
|
||||
const SetVector<Value *> &SunkAllocas) {
|
||||
SetVector<Value *> InputObjectsWithLifetime;
|
||||
for (BasicBlock *BB : Blocks) {
|
||||
for (auto It = BB->begin(), End = BB->end(); It != End;) {
|
||||
auto *II = dyn_cast<IntrinsicInst>(&*It);
|
||||
++It;
|
||||
if (!II || !II->isLifetimeStartOrEnd())
|
||||
continue;
|
||||
|
||||
// Get the memory operand of the lifetime marker. If the underlying
|
||||
// object is a sunk alloca, or is otherwise defined in the extraction
|
||||
// region, the lifetime marker must not be erased.
|
||||
Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
|
||||
if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
|
||||
continue;
|
||||
|
||||
InputObjectsWithLifetime.insert(Mem);
|
||||
II->eraseFromParent();
|
||||
}
|
||||
}
|
||||
return InputObjectsWithLifetime;
|
||||
}
|
||||
|
||||
/// Insert lifetime start/end markers surrounding the call to the new function
|
||||
/// for objects defined in the caller.
|
||||
static void insertLifetimeMarkersSurroundingCall(Module *M,
|
||||
ArrayRef<Value *> Objects,
|
||||
CallInst *TheCall) {
|
||||
if (Objects.empty())
|
||||
return;
|
||||
|
||||
LLVMContext &Ctx = M->getContext();
|
||||
auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
|
||||
auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
|
||||
auto StartFn = llvm::Intrinsic::getDeclaration(
|
||||
M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
|
||||
auto EndFn = llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::lifetime_end,
|
||||
Int8PtrTy);
|
||||
Instruction *Term = TheCall->getParent()->getTerminator();
|
||||
for (Value *Mem : Objects) {
|
||||
assert((!isa<Instruction>(Mem) ||
|
||||
cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
|
||||
"Input memory not defined in original function");
|
||||
Value *MemAsI8Ptr = nullptr;
|
||||
if (Mem->getType() == Int8PtrTy)
|
||||
MemAsI8Ptr = Mem;
|
||||
else
|
||||
MemAsI8Ptr =
|
||||
CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
|
||||
|
||||
auto StartMarker = CallInst::Create(StartFn, {NegativeOne, MemAsI8Ptr});
|
||||
StartMarker->insertBefore(TheCall);
|
||||
auto EndMarker = CallInst::Create(EndFn, {NegativeOne, MemAsI8Ptr});
|
||||
EndMarker->insertBefore(Term);
|
||||
}
|
||||
}
|
||||
|
||||
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
|
||||
/// the call instruction, splitting any PHI nodes in the header block as
|
||||
/// necessary.
|
||||
|
@ -1119,6 +1184,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
|
|||
break;
|
||||
}
|
||||
|
||||
// Insert lifetime markers around the reloads of any output values. The
|
||||
// allocas output values are stored in are only in-use in the codeRepl block.
|
||||
insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, call);
|
||||
|
||||
return call;
|
||||
}
|
||||
|
||||
|
@ -1181,71 +1250,6 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
|
|||
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
|
||||
}
|
||||
|
||||
/// Scan the extraction region for lifetime markers which reference inputs.
|
||||
/// Erase these markers. Return the inputs which were referenced.
|
||||
///
|
||||
/// The extraction region is defined by a set of blocks (\p Blocks), and a set
|
||||
/// of allocas which will be moved from the caller function into the extracted
|
||||
/// function (\p SunkAllocas).
|
||||
static SetVector<Value *>
|
||||
eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
|
||||
const SetVector<Value *> &SunkAllocas) {
|
||||
SetVector<Value *> InputObjectsWithLifetime;
|
||||
for (BasicBlock *BB : Blocks) {
|
||||
for (auto It = BB->begin(), End = BB->end(); It != End;) {
|
||||
auto *II = dyn_cast<IntrinsicInst>(&*It);
|
||||
++It;
|
||||
if (!II || !II->isLifetimeStartOrEnd())
|
||||
continue;
|
||||
|
||||
// Get the memory operand of the lifetime marker. If the underlying
|
||||
// object is a sunk alloca, or is otherwise defined in the extraction
|
||||
// region, the lifetime marker must not be erased.
|
||||
Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
|
||||
if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
|
||||
continue;
|
||||
|
||||
InputObjectsWithLifetime.insert(Mem);
|
||||
II->eraseFromParent();
|
||||
}
|
||||
}
|
||||
return InputObjectsWithLifetime;
|
||||
}
|
||||
|
||||
/// Insert lifetime start/end markers surrounding the call to the new function
|
||||
/// for objects defined in the caller.
|
||||
static void insertLifetimeMarkersSurroundingCall(
|
||||
Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
|
||||
CallInst *TheCall) {
|
||||
if (InputObjectsWithLifetime.empty())
|
||||
return;
|
||||
|
||||
LLVMContext &Ctx = M->getContext();
|
||||
auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
|
||||
auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
|
||||
auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
|
||||
M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
|
||||
auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
|
||||
M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
|
||||
for (Value *Mem : InputObjectsWithLifetime) {
|
||||
assert((!isa<Instruction>(Mem) ||
|
||||
cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
|
||||
"Input memory not defined in original function");
|
||||
Value *MemAsI8Ptr = nullptr;
|
||||
if (Mem->getType() == Int8PtrTy)
|
||||
MemAsI8Ptr = Mem;
|
||||
else
|
||||
MemAsI8Ptr =
|
||||
CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
|
||||
|
||||
auto StartMarker =
|
||||
CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
|
||||
StartMarker->insertBefore(TheCall);
|
||||
auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
|
||||
EndMarker->insertAfter(TheCall);
|
||||
}
|
||||
}
|
||||
|
||||
Function *CodeExtractor::extractCodeRegion() {
|
||||
if (!isEligible())
|
||||
return nullptr;
|
||||
|
@ -1389,7 +1393,8 @@ Function *CodeExtractor::extractCodeRegion() {
|
|||
// Replicate the effects of any lifetime start/end markers which referenced
|
||||
// input objects in the extraction region by placing markers around the call.
|
||||
insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
|
||||
InputObjectsWithLifetime, TheCall);
|
||||
InputObjectsWithLifetime.getArrayRef(),
|
||||
TheCall);
|
||||
|
||||
// Propagate personality info to the new function if there is one.
|
||||
if (oldFunction->hasPersonalityFn())
|
||||
|
|
|
@ -23,7 +23,8 @@ if.end: ; preds = %if.then, %entry
|
|||
|
||||
; CHECK-LABEL: @caller
|
||||
; CHECK: codeRepl.i:
|
||||
; CHECK-NEXT: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]]
|
||||
; CHECK-NOT: br label
|
||||
; CHECK: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]]
|
||||
define i32 @caller(i32 %v) !dbg !8 {
|
||||
entry:
|
||||
%call = call i32 @callee(i32 %v), !dbg !14
|
||||
|
@ -53,7 +54,8 @@ if.end:
|
|||
|
||||
; CHECK-LABEL: @caller2
|
||||
; CHECK: codeRepl.i:
|
||||
; CHECK-NEXT: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]]
|
||||
; CHECK-NOT: br label
|
||||
; CHECK: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]]
|
||||
define i32 @caller2(i32 %v) !dbg !21 {
|
||||
entry:
|
||||
%call = call i32 @callee2(i32 %v), !dbg !22
|
||||
|
|
|
@ -26,7 +26,11 @@ bb5: ; preds = %bb4, %bb1, %bb
|
|||
; CHECK-LABEL: bb:
|
||||
; CHECK-NEXT: [[CALL26LOC:%.*]] = alloca i8*
|
||||
; CHECK-LABEL: codeRepl.i:
|
||||
; CHECK-NEXT: %lt.cast.i = bitcast i8** [[CALL26LOC]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast.i)
|
||||
; CHECK-NEXT: call void @bar.1.bb1(i8** [[CALL26LOC]])
|
||||
; CHECK-NEXT: %call26.reload.i = load i8*, i8** [[CALL26LOC]]
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast.i)
|
||||
define i8* @dummy_caller(i32 %arg) {
|
||||
bb:
|
||||
%tmp = tail call i8* @bar(i32 %arg)
|
||||
|
|
|
@ -19,7 +19,8 @@ if.end: ; preds = %if.then, %entry
|
|||
|
||||
; CHECK-LABEL: @caller
|
||||
; CHECK: codeRepl.i:
|
||||
; CHECK-NEXT: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]]
|
||||
; CHECK-NOT: br label
|
||||
; CHECK: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]]
|
||||
define i32 @caller(i32 %v) !dbg !8 {
|
||||
entry:
|
||||
%call = call i32 (i32, ...) @callee(i32 %v, i32 99), !dbg !14
|
||||
|
|
|
@ -15,7 +15,7 @@ declare void @sink() cold
|
|||
; CHECK: call {{.*}}@sideeffect(
|
||||
; CHECK: call {{.*}}@realloc(
|
||||
; CHECK-LABEL: codeRepl:
|
||||
; CHECK-NEXT: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc)
|
||||
; CHECK: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc)
|
||||
; CHECK-LABEL: cleanup:
|
||||
; CHECK-NEXT: phi i8* [ null, %if.then ], [ %call, %if.end ], [ %retval.0.ce.reload, %codeRepl ]
|
||||
define i8* @realloc2(i8* %ptr, i64 %size) {
|
||||
|
|
|
@ -30,13 +30,13 @@ normalPath:
|
|||
|
||||
; CHECK-LABEL: codeRepl:
|
||||
; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8*
|
||||
; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
|
||||
; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
|
||||
; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
|
||||
; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
|
||||
; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
|
||||
; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
|
||||
; CHECK: br i1
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
|
||||
; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
|
||||
; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
|
||||
; CHECK-NEXT: br i1
|
||||
|
||||
outlinedPath:
|
||||
; These two uses of stack slots are overlapping. This should prevent
|
||||
|
|
|
@ -12,8 +12,11 @@ target triple = "x86_64-apple-macosx10.14.0"
|
|||
; CHECK-NEXT: ]
|
||||
;
|
||||
; CHECK: codeRepl:
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: lifetime.start
|
||||
; CHECK-NEXT: call void @pluto.cold.1(i1* %tmp8.ce.loc)
|
||||
; CHECK-NEXT: %tmp8.ce.reload = load i1, i1* %tmp8.ce.loc
|
||||
; CHECK-NEXT: lifetime.end
|
||||
; CHECK-NEXT: br label %bb7
|
||||
;
|
||||
; CHECK: bb7:
|
||||
|
|
Loading…
Reference in New Issue