From 360abb7ee56f1524b6e2951a4fda36296d5b3582 Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Wed, 15 Jan 2020 11:26:34 -0800 Subject: [PATCH] [CodeExtractor] Transfer debug info to extracted function After extracting, fix up debug info in both the old and new functions by 1) Pointing line locations and debug intrinsics to the new subprogram scope, and 2) Deleting intrinsics which point to values outside of the new function. Depends on https://reviews.llvm.org/D72795. Testing: check-llvm, check-clang, a build of LNT in the `-Os -g` config with "-mllvm -hot-cold-split=1" set, and end-to-end debugging of a toy program which undergoes splitting to verify that lldb can find variables, single step, etc. in extracted code. rdar://45507940 Differential Revision: https://reviews.llvm.org/D72801 --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 147 +++++++++++++++--- .../HotColdSplit/split-out-dbg-label.ll | 55 +++++++ .../HotColdSplit/transfer-debug-info.ll | 77 +++++++++ .../update-split-loop-metadata.ll | 58 +++++++ 4 files changed, 317 insertions(+), 20 deletions(-) create mode 100644 llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll create mode 100644 llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll create mode 100644 llvm/test/Transforms/HotColdSplit/update-split-loop-metadata.ll diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 682af4a88d3e..938b72e531ec 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -31,11 +31,14 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -1382,6 +1385,129 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } +/// Erase debug info intrinsics which refer to values in \p F but aren't in +/// \p F. +static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) { + for (Instruction &I : instructions(F)) { + SmallVector DbgUsers; + findDbgUsers(DbgUsers, &I); + for (DbgVariableIntrinsic *DVI : DbgUsers) + if (DVI->getFunction() != &F) + DVI->eraseFromParent(); + } +} + +/// Fix up the debug info in the old and new functions by pointing line +/// locations and debug intrinsics to the new subprogram scope, and by deleting +/// intrinsics which point to values outside of the new function. +static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, + CallInst &TheCall) { + DISubprogram *OldSP = OldFunc.getSubprogram(); + LLVMContext &Ctx = OldFunc.getContext(); + + // See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor. + bool NeedWorkaroundForOpenMPIRBuilderBug = + OldSP && OldSP->getRetainedNodes()->isTemporary(); + + if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) { + // Erase any debug info the new function contains. + stripDebugInfo(NewFunc); + // Make sure the old function doesn't contain any non-local metadata refs. + eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); + return; + } + + // Create a subprogram for the new function. Leave out a description of the + // function arguments, as the parameters don't correspond to anything at the + // source level. + assert(OldSP->getUnit() && "Missing compile unit for subprogram"); + DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolvedNodes=*/false, + OldSP->getUnit()); + auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); + DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | + DISubprogram::SPFlagOptimized | + DISubprogram::SPFlagLocalToUnit; + auto NewSP = DIB.createFunction( + OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(), + /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags); + NewFunc.setSubprogram(NewSP); + + // Debug intrinsics in the new function need to be updated in one of two + // ways: + // 1) They need to be deleted, because they describe a value in the old + // function. + // 2) They need to point to fresh metadata, e.g. because they currently + // point to a variable in the wrong scope. + SmallDenseMap RemappedMetadata; + SmallVector DebugIntrinsicsToDelete; + for (Instruction &I : instructions(NewFunc)) { + auto *DII = dyn_cast(&I); + if (!DII) + continue; + + // Point the intrinsic to a fresh label within the new function. + if (auto *DLI = dyn_cast(&I)) { + DILabel *OldLabel = DLI->getLabel(); + DINode *&NewLabel = RemappedMetadata[OldLabel]; + if (!NewLabel) + NewLabel = DILabel::get(Ctx, NewSP, OldLabel->getName(), + OldLabel->getFile(), OldLabel->getLine()); + DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel)); + continue; + } + + // If the location isn't a constant or an instruction, delete the + // intrinsic. + auto *DVI = cast(DII); + Value *Location = DVI->getVariableLocation(); + if (!Location || + (!isa(Location) && !isa(Location))) { + DebugIntrinsicsToDelete.push_back(DVI); + continue; + } + + // If the variable location is an instruction but isn't in the new + // function, delete the intrinsic. + Instruction *LocationInst = dyn_cast(Location); + if (LocationInst && LocationInst->getFunction() != &NewFunc) { + DebugIntrinsicsToDelete.push_back(DVI); + continue; + } + + // Point the intrinsic to a fresh variable within the new function. + DILocalVariable *OldVar = DVI->getVariable(); + DINode *&NewVar = RemappedMetadata[OldVar]; + if (!NewVar) + NewVar = DIB.createAutoVariable( + NewSP, OldVar->getName(), OldVar->getFile(), OldVar->getLine(), + OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero, + OldVar->getAlignInBits()); + DVI->setArgOperand(1, MetadataAsValue::get(Ctx, NewVar)); + } + for (auto *DII : DebugIntrinsicsToDelete) + DII->eraseFromParent(); + DIB.finalizeSubprogram(NewSP); + + // Fix up the scope information attached to the line locations in the new + // function. + for (Instruction &I : instructions(NewFunc)) { + if (const DebugLoc &DL = I.getDebugLoc()) + I.setDebugLoc(DebugLoc::get(DL.getLine(), DL.getCol(), NewSP)); + + // Loop info metadata may contain line locations. Fix them up. + auto updateLoopInfoLoc = [&Ctx, + NewSP](const DILocation &Loc) -> DILocation * { + return DILocation::get(Ctx, Loc.getLine(), Loc.getColumn(), NewSP, + nullptr); + }; + updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); + } + if (!TheCall.getDebugLoc()) + TheCall.setDebugLoc(DebugLoc::get(0, 0, OldSP)); + + eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); +} + Function * CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { if (!isEligible()) @@ -1567,26 +1693,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { } } - // Erase debug info intrinsics. Variable updates within the new function are - // invisible to debuggers. This could be improved by defining a DISubprogram - // for the new function. - for (BasicBlock &BB : *newFunction) { - auto BlockIt = BB.begin(); - // Remove debug info intrinsics from the new function. - while (BlockIt != BB.end()) { - Instruction *Inst = &*BlockIt; - ++BlockIt; - if (isa(Inst)) - Inst->eraseFromParent(); - } - // Remove debug info intrinsics which refer to values in the new function - // from the old function. - SmallVector DbgUsers; - for (Instruction &I : BB) - findDbgUsers(DbgUsers, &I); - for (DbgVariableIntrinsic *DVI : DbgUsers) - DVI->eraseFromParent(); - } + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); // Mark the new function `noreturn` if applicable. Terminators which resume // exception propagation are treated as returning instructions. This is to diff --git a/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll b/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll new file mode 100644 index 000000000000..3e2ed2762710 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll @@ -0,0 +1,55 @@ +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s + +; When an llvm.dbg.label intrinsic is extracted into a new function, make sure +; that its metadata argument is a DILabel that points to a scope within the new +; function. +; +; In this example, the label "bye" points to the scope for @foo before +; splitting, and should point to the scope for @foo.cold.1 after splitting. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@foo.cold.1 +; CHECK: llvm.dbg.label(metadata [[LABEL:![0-9]+]]), !dbg [[LINE:![0-9]+]] + +; CHECK: [[FILE:![0-9]+]] = !DIFile +; CHECK: [[SCOPE:![0-9]+]] = distinct !DISubprogram(name: "foo.cold.1" +; CHECK: [[LINE]] = !DILocation(line: 1, column: 1, scope: [[SCOPE]] +; CHECK: [[LABEL]] = !DILabel(scope: [[SCOPE]], name: "bye", file: [[FILE]], line: 28 + +define void @foo(i32 %arg1) !dbg !6 { +entry: + %var = add i32 0, 0, !dbg !11 + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + call void @llvm.dbg.label(metadata !12), !dbg !11 + call void @sink() + ret void +} + +declare void @llvm.dbg.label(metadata) + +declare void @sink() cold + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "", directory: "/") +!2 = !{} +!3 = !{i32 7} +!4 = !{i32 1} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{!9} +!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) +!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!11 = !DILocation(line: 1, column: 1, scope: !6) +!12 = !DILabel(scope: !6, name: "bye", file: !1, line: 28) diff --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll new file mode 100644 index 000000000000..d28f46a3c9f1 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll @@ -0,0 +1,77 @@ +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; The block "if.end" in @foo is extracted into a new function, @foo.cold.1. +; Check the following: + +; CHECK-LABEL: define {{.*}}@foo.cold.1 + +; - The llvm.dbg.value intrinsic pointing to an argument in @foo (%arg1) is +; dropped +; CHECK-NOT: llvm.dbg.value + +; - Instructions without locations in the original function have no +; location in the new function +; CHECK: [[ADD1:%.*]] = add i32 %{{.*}}, 1{{$}} + +; - Ditto (see above), calls are not special +; CHECK-NEXT: call void @sink(i32 [[ADD1]]) + +; - Line locations are preserved +; CHECK-NEXT: call void @sink(i32 [[ADD1]]), !dbg [[LINE1:![0-9]+]] + +; - llvm.dbg.value intrinsics for values local to @foo.cold.1 are preserved +; CHECK-NEXT: llvm.dbg.value(metadata i32 [[ADD1]], metadata [[VAR1:![0-9]+]], metadata !DIExpression()), !dbg [[LINE1]] + +; - Expressions inside of dbg.value intrinsics are preserved +; CHECK-NEXT: llvm.dbg.value(metadata i32 [[ADD1]], metadata [[VAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_plus, DW_OP_stack_value) + +; - The DISubprogram for @foo.cold.1 has an empty DISubroutineType +; CHECK: [[FILE:![0-9]+]] = !DIFile(filename: "" +; CHECK: [[EMPTY_MD:![0-9]+]] = !{} +; CHECK: [[EMPTY_TYPE:![0-9]+]] = !DISubroutineType(types: [[EMPTY_MD]]) +; CHECK: [[NEWSCOPE:![0-9]+]] = distinct !DISubprogram(name: "foo.cold.1", linkageName: "foo.cold.1", scope: null, file: [[FILE]], type: [[EMPTY_TYPE]], spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized + +; - Line locations in @foo.cold.1 point to the new scope for @foo.cold.1 +; CHECK: [[LINE1]] = !DILocation(line: 1, column: 1, scope: [[NEWSCOPE]]) + +define void @foo(i32 %arg1) !dbg !6 { +entry: + %var = add i32 0, 0, !dbg !11 + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + ret void + +if.end: ; preds = %entry + call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11 + %add1 = add i32 %arg1, 1 + call void @sink(i32 %add1) + call void @sink(i32 %add1), !dbg !11 + call void @llvm.dbg.value(metadata i32 %add1, metadata !9, metadata !DIExpression()), !dbg !11 + call void @llvm.dbg.value(metadata i32 %add1, metadata !9, metadata !DIExpression(DW_OP_constu, 1, DW_OP_plus, DW_OP_stack_value)), !dbg !11 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +declare void @sink(i32) cold + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "", directory: "/") +!2 = !{} +!3 = !{i32 7} +!4 = !{i32 1} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{!9} +!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) +!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!11 = !DILocation(line: 1, column: 1, scope: !6) diff --git a/llvm/test/Transforms/HotColdSplit/update-split-loop-metadata.ll b/llvm/test/Transforms/HotColdSplit/update-split-loop-metadata.ll new file mode 100644 index 000000000000..ccfe84fe7979 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/update-split-loop-metadata.ll @@ -0,0 +1,58 @@ +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s + +; Check that llvm.loop metadata extracted by CodeExtractor is updated so that +; the debug locations it contains have the right scope. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: define {{.*}}@basic.cold.1 +; CHECK: br i1 {{.*}}, !llvm.loop [[LOOP_MD:![0-9]+]] + +; The scope for these debug locations should be @basic.cold.1, not @basic. +; CHECK: [[SCOPE:![0-9]+]] = distinct !DISubprogram(name: "basic.cold.1" +; CHECK: [[LOOP_MD]] = distinct !{[[LOOP_MD]], [[LINE:![0-9]+]], [[LINE]]} +; CHECK: [[LINE]] = !DILocation(line: 1, column: 1, scope: [[SCOPE]]) + +define void @basic(i32* %p, i32 %k) !dbg !6 { +entry: + %cmp3 = icmp slt i32 0, %k + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1 + store i32 %i.05, i32* %p.addr.04, align 4 + %inc = add nsw i32 %i.05, 1 + call void @sink() + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !llvm.loop !10 + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + +declare void @sink() cold + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "", directory: "/") +!2 = !{} +!3 = !{i32 22} +!4 = !{i32 12} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "basic", linkageName: "basic", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{} +!9 = !DILocation(line: 1, column: 1, scope: !6) +!10 = distinct !{!10, !9, !9}