From 06a2128cfa56e0d4954d231b86f44fe04ae8db95 Mon Sep 17 00:00:00 2001 From: Taewook Oh Date: Mon, 13 Feb 2017 18:15:31 +0000 Subject: [PATCH] Make MachineBasicBlock::updateTerminator to update DebugLoc as well Summary: Currently MachineBasicBlock::updateTerminator simply drops DebugLoc for newly created branch instructions, which may cause incorrect stepping and/or imprecise sample profile data. Below is an example: ``` 1 extern int bar(int x); 2 3 int foo(int *begin, int *end) { 4 int *i; 5 int ret = 0; 6 for ( 7 i = begin ; 8 i != end ; 9 i++) 10 { 11 ret += bar(*i); 12 } 13 return ret; 14 } ``` Below is a bitcode of 'foo' at the end of LLVM-IR level optimizations with -O3: ``` define i32 @foo(i32* readonly %begin, i32* readnone %end) !dbg !4 { entry: %cmp6 = icmp eq i32* %begin, %end, !dbg !9 br i1 %cmp6, label %for.end, label %for.body.preheader, !dbg !12 for.body.preheader: ; preds = %entry br label %for.body, !dbg !13 for.body: ; preds = %for.body.preheader, %for.body %ret.08 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] %i.07 = phi i32* [ %incdec.ptr, %for.body ], [ %begin, %for.body.preheader ] %0 = load i32, i32* %i.07, align 4, !dbg !13, !tbaa !15 %call = tail call i32 @bar(i32 %0), !dbg !19 %add = add nsw i32 %call, %ret.08, !dbg !20 %incdec.ptr = getelementptr inbounds i32, i32* %i.07, i64 1, !dbg !21 %cmp = icmp eq i32* %incdec.ptr, %end, !dbg !9 br i1 %cmp, label %for.end.loopexit, label %for.body, !dbg !12, !llvm.loop !22 for.end.loopexit: ; preds = %for.body br label %for.end, !dbg !24 for.end: ; preds = %for.end.loopexit, %entry %ret.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ] ret i32 %ret.0.lcssa, !dbg !24 } ``` where ``` !12 = !DILocation(line: 6, column: 3, scope: !11) ``` . As you can see, the terminator of 'entry' block, which is a loop control branch, has a DebugLoc of line 6, column 3. Howerver, after the execution of 'MachineBlock::updateTerminator' function, which is triggered by MachineSinking pass, the DebugLoc info is dropped as below (see there's no debug-location for JNE_1): ``` bb.0.entry: successors: %bb.4(0x30000000), %bb.1.for.body.preheader(0x50000000) liveins: %rdi, %rsi %6 = COPY %rsi %5 = COPY %rdi %8 = SUB64rr %5, %6, implicit-def %eflags, debug-location !9 JNE_1 %bb.1.for.body.preheader, implicit %eflags ``` This patch addresses this issue and make newly created branch instructions to keep debug-location info. Reviewers: aprantl, MatzeB, craig.topper, qcolombet Reviewed By: qcolombet Subscribers: qcolombet, llvm-commits Differential Revision: https://reviews.llvm.org/D29596 llvm-svn: 294976 --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 4 + llvm/lib/CodeGen/MachineBasicBlock.cpp | 23 ++++- .../CodeGen/X86/update-terminator-debugloc.ll | 91 +++++++++++++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/update-terminator-debugloc.ll diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index f3f5e324d76a..ff9544c94ab0 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -664,6 +664,10 @@ public: return findDebugLoc(MBBI.getInstrIterator()); } + /// Find and return the merged DebugLoc of the branch instructions of the + /// block. Return UnknownLoc if there is none. + DebugLoc findBranchDebugLoc(); + /// Possible outcome of a register liveness query to computeRegisterLiveness() enum LivenessQueryResult { LQR_Live, ///< Register is known to be (at least partially) live. diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index a7d87799f679..2468bfa2a511 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -423,7 +424,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - DebugLoc DL; // FIXME: this is nowhere + DebugLoc DL = findBranchDebugLoc(); bool B = TII->analyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -491,7 +492,7 @@ void MachineBasicBlock::updateTerminator() { // FIXME: This does not seem like a reasonable pattern to support, but it // has been seen in the wild coming out of degenerate ARM test cases. TII->removeBranch(*this); - + // Finally update the unconditional successor to be reached via a branch if // it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) @@ -1150,6 +1151,24 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { return {}; } +/// Find and return the merged DebugLoc of the branch instructions of the block. +/// Return UnknownLoc if there is none. +DebugLoc +MachineBasicBlock::findBranchDebugLoc() { + DebugLoc DL {}; + auto TI = getFirstTerminator(); + while (TI != end() && !TI->isBranch()) + ++TI; + + if (TI != end()) { + DL = TI->getDebugLoc(); + for (++TI ; TI != end() ; ++TI) + if (TI->isBranch()) + DL = DILocation::getMergedLocation(DL, TI->getDebugLoc()); + } + return DL; +} + /// Return probability of the edge from this block to MBB. BranchProbability MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const { diff --git a/llvm/test/CodeGen/X86/update-terminator-debugloc.ll b/llvm/test/CodeGen/X86/update-terminator-debugloc.ll new file mode 100644 index 000000000000..359c348b42cb --- /dev/null +++ b/llvm/test/CodeGen/X86/update-terminator-debugloc.ll @@ -0,0 +1,91 @@ +; RUN: llc -stop-after=machine-sink -march=x86-64 < %s | FileCheck %s +; +; test code: +; 1 extern int bar(int x); +; 2 +; 3 int foo(int *begin, int *end) { +; 4 int *i; +; 5 int ret = 0; +; 6 for ( +; 7 i = begin ; +; 8 i != end ; +; 9 i++) +; 10 { +; 11 ret += bar(*i); +; 12 } +; 13 return ret; +; 14 } +; +; With the test code, LLVM-IR below shows that loop-control branches have a +; debug location of line 6 (branches in entry and for.body block). Make sure that +; these debug locations are propaged correctly to lowered instructions. +; +; CHECK: [[DLOC:![0-9]+]] = !DILocation(line: 6 +; CHECK-DAG: [[VREG1:%[^ ]+]] = COPY %rsi +; CHECK-DAG: [[VREG2:%[^ ]+]] = COPY %rdi +; CHECK: SUB64rr [[VREG2]], [[VREG1]] +; CHECK-NEXT: JNE_1 {{.*}}, debug-location [[DLOC]]{{$}} +; CHECK: [[VREG3:%[^ ]+]] = PHI [[VREG2]] +; CHECK: [[VREG4:%[^ ]+]] = ADD64ri8 [[VREG3]], 4 +; CHECK: SUB64rr [[VREG1]], [[VREG4]] +; CHECK-NEXT: JNE_1 {{.*}}, debug-location [[DLOC]]{{$}} +; CHECK-NEXT: JMP_1 {{.*}}, debug-location [[DLOC]]{{$}} + +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32* readonly %begin, i32* readnone %end) !dbg !4 { +entry: + %cmp6 = icmp eq i32* %begin, %end, !dbg !9 + br i1 %cmp6, label %for.end, label %for.body.preheader, !dbg !12 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !13 + +for.body: ; preds = %for.body.preheader, %for.body + %ret.08 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %i.07 = phi i32* [ %incdec.ptr, %for.body ], [ %begin, %for.body.preheader ] + %0 = load i32, i32* %i.07, align 4, !dbg !13, !tbaa !15 + %call = tail call i32 @bar(i32 %0), !dbg !19 + %add = add nsw i32 %call, %ret.08, !dbg !20 + %incdec.ptr = getelementptr inbounds i32, i32* %i.07, i64 1, !dbg !21 + %cmp = icmp eq i32* %incdec.ptr, %end, !dbg !9 + br i1 %cmp, label %for.end.loopexit, label %for.body, !dbg !12, !llvm.loop !22 + +for.end.loopexit: ; preds = %for.body + br label %for.end, !dbg !24 + +for.end: ; preds = %for.end.loopexit, %entry + %ret.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ] + ret i32 %ret.0.lcssa, !dbg !24 +} + +declare i32 @bar(i32) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1) +!1 = !DIFile(filename: "foo.c", directory: "b/") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!5 = !DISubroutineType(types: !6) +!6 = !{!7, !8, !8} +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) +!9 = !DILocation(line: 8, column: 9, scope: !10) +!10 = distinct !DILexicalBlock(scope: !11, file: !1, line: 6, column: 3) +!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 6, column: 3) +!12 = !DILocation(line: 6, column: 3, scope: !11) +!13 = !DILocation(line: 11, column: 18, scope: !14) +!14 = distinct !DILexicalBlock(scope: !10, file: !1, line: 10, column: 3) +!15 = !{!16, !16, i64 0} +!16 = !{!"int", !17, i64 0} +!17 = !{!"omnipotent char", !18, i64 0} +!18 = !{!"Simple C/C++ TBAA"} +!19 = !DILocation(line: 11, column: 14, scope: !14) +!20 = !DILocation(line: 11, column: 11, scope: !14) +!21 = !DILocation(line: 9, column: 8, scope: !10) +!22 = distinct !{!22, !12, !23} +!23 = !DILocation(line: 12, column: 3, scope: !11) +!24 = !DILocation(line: 13, column: 3, scope: !4)