[CodeExtractor] Emit lifetime markers around reloads of outputs

CodeExtractor permits extracting a region of blocks from a function even
when values defined within the region are used outside of it.

This is typically done by creating an alloca in the original function
and reloading the alloca after a call to the extracted function.

Wrap the reload in lifetime start/end markers to promote stack coloring.

Suggested by Sergei Kachkov!

Differential Revision: https://reviews.llvm.org/D56045

llvm-svn: 351621
This commit is contained in:
Vedant Kumar 2019-01-19 02:37:59 +00:00
parent c089fbbaf2
commit 17d9f14bff
7 changed files with 92 additions and 77 deletions

View File

@ -880,6 +880,71 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
return newFunction;
}
/// Scan the extraction region for lifetime markers which reference inputs.
/// Erase these markers. Return the inputs which were referenced.
///
/// The extraction region is defined by a set of blocks (\p Blocks), and a set
/// of allocas which will be moved from the caller function into the extracted
/// function (\p SunkAllocas).
static SetVector<Value *>
eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
const SetVector<Value *> &SunkAllocas) {
SetVector<Value *> InputObjectsWithLifetime;
for (BasicBlock *BB : Blocks) {
for (auto It = BB->begin(), End = BB->end(); It != End;) {
auto *II = dyn_cast<IntrinsicInst>(&*It);
++It;
if (!II || !II->isLifetimeStartOrEnd())
continue;
// Get the memory operand of the lifetime marker. If the underlying
// object is a sunk alloca, or is otherwise defined in the extraction
// region, the lifetime marker must not be erased.
Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
continue;
InputObjectsWithLifetime.insert(Mem);
II->eraseFromParent();
}
}
return InputObjectsWithLifetime;
}
/// Insert lifetime start/end markers surrounding the call to the new function
/// for objects defined in the caller.
static void insertLifetimeMarkersSurroundingCall(Module *M,
ArrayRef<Value *> Objects,
CallInst *TheCall) {
if (Objects.empty())
return;
LLVMContext &Ctx = M->getContext();
auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
auto StartFn = llvm::Intrinsic::getDeclaration(
M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
auto EndFn = llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::lifetime_end,
Int8PtrTy);
Instruction *Term = TheCall->getParent()->getTerminator();
for (Value *Mem : Objects) {
assert((!isa<Instruction>(Mem) ||
cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
"Input memory not defined in original function");
Value *MemAsI8Ptr = nullptr;
if (Mem->getType() == Int8PtrTy)
MemAsI8Ptr = Mem;
else
MemAsI8Ptr =
CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
auto StartMarker = CallInst::Create(StartFn, {NegativeOne, MemAsI8Ptr});
StartMarker->insertBefore(TheCall);
auto EndMarker = CallInst::Create(EndFn, {NegativeOne, MemAsI8Ptr});
EndMarker->insertBefore(Term);
}
}
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
/// the call instruction, splitting any PHI nodes in the header block as
/// necessary.
@ -1119,6 +1184,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
break;
}
// Insert lifetime markers around the reloads of any output values. The
// allocas output values are stored in are only in-use in the codeRepl block.
insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, call);
return call;
}
@ -1181,71 +1250,6 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
/// Scan the extraction region for lifetime markers which reference inputs.
/// Erase these markers. Return the inputs which were referenced.
///
/// The extraction region is defined by a set of blocks (\p Blocks), and a set
/// of allocas which will be moved from the caller function into the extracted
/// function (\p SunkAllocas).
static SetVector<Value *>
eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
const SetVector<Value *> &SunkAllocas) {
SetVector<Value *> InputObjectsWithLifetime;
for (BasicBlock *BB : Blocks) {
for (auto It = BB->begin(), End = BB->end(); It != End;) {
auto *II = dyn_cast<IntrinsicInst>(&*It);
++It;
if (!II || !II->isLifetimeStartOrEnd())
continue;
// Get the memory operand of the lifetime marker. If the underlying
// object is a sunk alloca, or is otherwise defined in the extraction
// region, the lifetime marker must not be erased.
Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
continue;
InputObjectsWithLifetime.insert(Mem);
II->eraseFromParent();
}
}
return InputObjectsWithLifetime;
}
/// Insert lifetime start/end markers surrounding the call to the new function
/// for objects defined in the caller.
static void insertLifetimeMarkersSurroundingCall(
Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
CallInst *TheCall) {
if (InputObjectsWithLifetime.empty())
return;
LLVMContext &Ctx = M->getContext();
auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
for (Value *Mem : InputObjectsWithLifetime) {
assert((!isa<Instruction>(Mem) ||
cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
"Input memory not defined in original function");
Value *MemAsI8Ptr = nullptr;
if (Mem->getType() == Int8PtrTy)
MemAsI8Ptr = Mem;
else
MemAsI8Ptr =
CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
auto StartMarker =
CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
StartMarker->insertBefore(TheCall);
auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
EndMarker->insertAfter(TheCall);
}
}
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
@ -1389,7 +1393,8 @@ Function *CodeExtractor::extractCodeRegion() {
// Replicate the effects of any lifetime start/end markers which referenced
// input objects in the extraction region by placing markers around the call.
insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
InputObjectsWithLifetime, TheCall);
InputObjectsWithLifetime.getArrayRef(),
TheCall);
// Propagate personality info to the new function if there is one.
if (oldFunction->hasPersonalityFn())

View File

@ -23,7 +23,8 @@ if.end: ; preds = %if.then, %entry
; CHECK-LABEL: @caller
; CHECK: codeRepl.i:
; CHECK-NEXT: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]]
; CHECK-NOT: br label
; CHECK: call void @callee.2.if.then(i32 %v, i32* %mul.loc.i), !dbg ![[DBG2:[0-9]+]]
define i32 @caller(i32 %v) !dbg !8 {
entry:
%call = call i32 @callee(i32 %v), !dbg !14
@ -53,7 +54,8 @@ if.end:
; CHECK-LABEL: @caller2
; CHECK: codeRepl.i:
; CHECK-NEXT: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]]
; CHECK-NOT: br label
; CHECK: call void @callee2.1.if.then(i32 %v, i32* %sub.loc.i), !dbg ![[DBG4:[0-9]+]]
define i32 @caller2(i32 %v) !dbg !21 {
entry:
%call = call i32 @callee2(i32 %v), !dbg !22

View File

@ -26,7 +26,11 @@ bb5: ; preds = %bb4, %bb1, %bb
; CHECK-LABEL: bb:
; CHECK-NEXT: [[CALL26LOC:%.*]] = alloca i8*
; CHECK-LABEL: codeRepl.i:
; CHECK-NEXT: %lt.cast.i = bitcast i8** [[CALL26LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %lt.cast.i)
; CHECK-NEXT: call void @bar.1.bb1(i8** [[CALL26LOC]])
; CHECK-NEXT: %call26.reload.i = load i8*, i8** [[CALL26LOC]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %lt.cast.i)
define i8* @dummy_caller(i32 %arg) {
bb:
%tmp = tail call i8* @bar(i32 %arg)

View File

@ -19,7 +19,8 @@ if.end: ; preds = %if.then, %entry
; CHECK-LABEL: @caller
; CHECK: codeRepl.i:
; CHECK-NEXT: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]]
; CHECK-NOT: br label
; CHECK: call void (i32, i32*, ...) @callee.1.if.then(i32 %v, i32* %mul.loc.i, i32 99), !dbg ![[DBG2:[0-9]+]]
define i32 @caller(i32 %v) !dbg !8 {
entry:
%call = call i32 (i32, ...) @callee(i32 %v, i32 99), !dbg !14

View File

@ -15,7 +15,7 @@ declare void @sink() cold
; CHECK: call {{.*}}@sideeffect(
; CHECK: call {{.*}}@realloc(
; CHECK-LABEL: codeRepl:
; CHECK-NEXT: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc)
; CHECK: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc)
; CHECK-LABEL: cleanup:
; CHECK-NEXT: phi i8* [ null, %if.then ], [ %call, %if.end ], [ %retval.0.ce.reload, %codeRepl ]
define i8* @realloc2(i8* %ptr, i64 %size) {

View File

@ -30,13 +30,13 @@ normalPath:
; CHECK-LABEL: codeRepl:
; CHECK: [[local1_cast:%.*]] = bitcast i256* %local1 to i8*
; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
; CHECK: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
; CHECK: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
; CHECK: br i1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local1_cast]])
; CHECK-NEXT: [[local2_cast:%.*]] = bitcast i256* %local2 to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[local2_cast]])
; CHECK-NEXT: call i1 @foo.cold.1(i8* %local1_cast, i8* %local2_cast)
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local1_cast]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[local2_cast]])
; CHECK-NEXT: br i1
outlinedPath:
; These two uses of stack slots are overlapping. This should prevent

View File

@ -12,8 +12,11 @@ target triple = "x86_64-apple-macosx10.14.0"
; CHECK-NEXT: ]
;
; CHECK: codeRepl:
; CHECK-NEXT: bitcast
; CHECK-NEXT: lifetime.start
; CHECK-NEXT: call void @pluto.cold.1(i1* %tmp8.ce.loc)
; CHECK-NEXT: %tmp8.ce.reload = load i1, i1* %tmp8.ce.loc
; CHECK-NEXT: lifetime.end
; CHECK-NEXT: br label %bb7
;
; CHECK: bb7: