Salvage debug info from instructions about to be deleted

This patch improves debug info quality in InstCombine by looking at
values that are about to be deleted, checking whether there are any
dbg.value instrinsics referring to them, and potentially encoding the
semantics of the deleted instruction into the dbg.value's
DIExpression.

In the example in the testcase (which was extracted from XNU) there is a sequence of

  %4 = load %struct.entry*, %struct.entry** %next2, align 8, !dbg !41
  %5 = bitcast %struct.entry* %4 to i8*, !dbg !42
  %add.ptr4 = getelementptr inbounds i8, i8* %5, i64 -8, !dbg !43
  %6 = bitcast i8* %add.ptr4 to %struct.entry*, !dbg !44
  call void @llvm.dbg.value(metadata %struct.entry* %6, i64 0, metadata !20, metadata !21), !dbg 34

When these instructions are eliminated by instcombine one after
another, we can still salvage the otherwise dead debug info:

- Bitcasts have no effect, so have the dbg.value point to operand(0)
- Loads can be expressed via a DW_OP_deref
- Constant gep instructions can be replaced by DWARF expression arithmetic

The API introduced by this patch is not specific to instcombine and
can be useful in other places, too.

rdar://problem/30725338

Differential Revision: https://reviews.llvm.org/D30919

llvm-svn: 297971
This commit is contained in:
Adrian Prantl 2017-03-16 18:22:52 +00:00
parent cb46a6baf7
commit 4377314a98
4 changed files with 191 additions and 37 deletions

View File

@ -282,8 +282,12 @@ bool LowerDbgDeclare(Function &F);
/// Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any.
DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
/// Finds the llvm.dbg.value intrinsics describing a value, if any.
void findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V);
/// Finds the llvm.dbg.value intrinsics describing a value and invoke \p Action
/// on it.
void findDbgValues(Value *V, std::function<void(DbgValueInst &)> Action);
/// Constants for \p replaceDbgDeclare and friends.
enum { NoDeref = false, WithDeref = true };
/// Replaces llvm.dbg.declare instruction when the address it describes
/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
@ -310,6 +314,11 @@ bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset = 0);
/// Assuming the instruction \p I is going to be deleted, attempt to salvage any
/// dbg.value intrinsics referring to \p I by rewriting its effect into a
/// DIExpression.
void salvageDebugInfo(Instruction &I);
/// Remove all instructions from a basic block other than it's terminator
/// and any present EH pad instructions.
unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);

View File

@ -28,6 +28,9 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/IR/DIBuilder.h"
#define DEBUG_TYPE "instcombine"
@ -470,8 +473,9 @@ public:
/// methods should return the value returned by this function.
Instruction *eraseInstFromFunction(Instruction &I) {
DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
salvageDebugInfo(I);
// Make sure that we reprocess all operands now that we reduced their
// use counts.
if (I.getNumOperands() < 8) {

View File

@ -1085,15 +1085,14 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
// Since we can't guarantee that the original dbg.declare instrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
SmallVector<DbgValueInst *, 1> DbgValues;
findDbgValues(DbgValues, APN);
for (auto *DVI : DbgValues) {
assert(DVI->getValue() == APN);
assert(DVI->getOffset() == 0);
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
return false;
bool Found = false;
findDbgValues(APN, [&](DbgValueInst &DVI) {
assert(DVI.getValue() == APN);
assert(DVI.getOffset() == 0);
if ((DVI.getVariable() == DIVar) && (DVI.getExpression() == DIExpr))
Found = true;
});
return Found;
}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
@ -1251,44 +1250,40 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return nullptr;
}
void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
void llvm::findDbgValues(Value *V, std::function<void(DbgValueInst &)> Action) {
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
DbgValues.push_back(DVI);
Action(*DVI);
}
static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) {
Expr.push_back(dwarf::DW_OP_deref);
}
static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) {
static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
if (Offset > 0) {
Expr.push_back(dwarf::DW_OP_plus);
Expr.push_back(Offset);
Ops.push_back(dwarf::DW_OP_plus);
Ops.push_back(Offset);
} else if (Offset < 0) {
Expr.push_back(dwarf::DW_OP_minus);
Expr.push_back(-Offset);
Ops.push_back(dwarf::DW_OP_minus);
Ops.push_back(-Offset);
}
}
static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder,
DIExpression *DIExpr, bool Deref,
int Offset) {
/// Prepend \p DIExpr with a deref and offset operation.
static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
bool Deref, int64_t Offset) {
if (!Deref && !Offset)
return DIExpr;
// Create a copy of the original DIDescriptor for user variable, prepending
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
SmallVector<uint64_t, 4> NewDIExpr;
SmallVector<uint64_t, 4> Ops;
if (Deref)
DIExprAddDeref(NewDIExpr);
DIExprAddOffset(NewDIExpr, Offset);
Ops.push_back(dwarf::DW_OP_deref);
appendOffset(Ops, Offset);
if (DIExpr)
NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
return Builder.createExpression(NewDIExpr);
Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
return Builder.createExpression(Ops);
}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
@ -1302,7 +1297,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset);
DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);
// Insert llvm.dbg.declare immediately after the original alloca, and remove
// old llvm.dbg.declare.
@ -1334,11 +1329,11 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
// Insert the offset immediately after the first deref.
// We could just change the offset argument of dbg.value, but it's unsigned...
if (Offset) {
SmallVector<uint64_t, 4> NewDIExpr;
DIExprAddDeref(NewDIExpr);
DIExprAddOffset(NewDIExpr, Offset);
NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
DIExpr = Builder.createExpression(NewDIExpr);
SmallVector<uint64_t, 4> Ops;
Ops.push_back(dwarf::DW_OP_deref);
appendOffset(Ops, Offset);
Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
DIExpr = Builder.createExpression(Ops);
}
Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
@ -1357,6 +1352,46 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
}
}
void llvm::salvageDebugInfo(Instruction &I) {
auto MDWrap = [&](Value *V) {
return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
};
auto &M = *I.getModule();
if (auto *BitCast = dyn_cast<BitCastInst>(&I))
findDbgValues(&I, [&](DbgValueInst &DVI) {
// Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
// to use the cast's source.
DVI.setOperand(0, MDWrap(I.getOperand(0)));
DEBUG(dbgs() << "SALVAGE: " << DVI << '\n');
});
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I))
findDbgValues(&I, [&](DbgValueInst &DVI) {
unsigned BitWidth =
M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
APInt Offset(BitWidth, 0);
// Rewrite a constant GEP into a DIExpression.
if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
auto *DIExpr = DVI.getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
// GEP offsets are i32 and thus alwaus fit into an int64_t.
DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());
DVI.setOperand(0, MDWrap(I.getOperand(0)));
DVI.setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << DVI << '\n');
}
});
else if (auto *Load = dyn_cast<LoadInst>(&I))
findDbgValues(&I, [&](DbgValueInst &DVI) {
// Rewrite the load into DW_OP_deref.
auto *DIExpr = DVI.getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);
DVI.setOperand(0, MDWrap(I.getOperand(0)));
DVI.setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << DVI << '\n');
});
}
unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
unsigned NumDeadInst = 0;
// Delete the instructions backwards, as it has a reduced likelihood of

View File

@ -0,0 +1,106 @@
; RUN: opt -instcombine %s -S -o - | FileCheck %s
; Verify that the eliminated instructions (bitcast, gep, load) are salvaged into
; a DIExpression.
;
; Originally created from the following C source and then heavily isolated/reduced.
;
; struct entry {
; struct entry *next;
; };
; void scan(struct entry *queue, struct entry *end)
; {
; struct entry *entry;
; for (entry = (struct entry *)((char *)(queue->next) - 8);
; &entry->next == end;
; entry = (struct entry *)((char *)(entry->next) - 8)) {
; }
; }
; ModuleID = '<stdin>'
source_filename = "test.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
%struct.entry = type { %struct.entry* }
; Function Attrs: nounwind ssp uwtable
define void @salvage_load(%struct.entry** %queue) local_unnamed_addr #0 !dbg !14 {
entry:
%im_not_dead = alloca %struct.entry*
%0 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
%1 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
call void @llvm.dbg.value(metadata %struct.entry* %1, i64 0, metadata !18, metadata !20), !dbg !19
; CHECK: define void @salvage_load
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry** %queue, i64 0,
; CHECK-SAME: metadata ![[LOAD_EXPR:[0-9]+]])
store %struct.entry* %1, %struct.entry** %im_not_dead, align 8
ret void, !dbg !21
}
; Function Attrs: nounwind ssp uwtable
define void @salvage_bitcast(%struct.entry* %queue) local_unnamed_addr #0 !dbg !14 {
entry:
%im_not_dead = alloca i8*
%0 = bitcast %struct.entry* %queue to i8*, !dbg !19
%1 = bitcast %struct.entry* %queue to i8*, !dbg !19
call void @llvm.dbg.value(metadata i8* %1, i64 0, metadata !18, metadata !20), !dbg !19
; CHECK: define void @salvage_bitcast
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0,
; CHECK-SAME: metadata ![[BITCAST_EXPR:[0-9]+]])
store i8* %1, i8** %im_not_dead, align 8
ret void, !dbg !21
}
; Function Attrs: nounwind ssp uwtable
define void @salvage_gep(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !14 {
entry:
%im_not_dead = alloca %struct.entry**
%0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19
%1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !19
call void @llvm.dbg.value(metadata %struct.entry** %1, i64 0, metadata !18, metadata !20), !dbg !19
; CHECK: define void @salvage_gep
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue, i64 0,
; CHECK-SAME: metadata ![[GEP_EXPR:[0-9]+]])
store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8
ret void, !dbg !21
}
; CHECK: ![[LOAD_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_plus, 0)
; CHECK: ![[BITCAST_EXPR]] = !DIExpression(DW_OP_plus, 0)
; CHECK: ![[GEP_EXPR]] = !DIExpression(DW_OP_minus, 8, DW_OP_plus, 0)
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
attributes #0 = { nounwind ssp uwtable }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11, !12}
!llvm.ident = !{!13}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
!1 = !DIFile(filename: "test.c", directory: "/")
!2 = !{}
!3 = !{!4, !8}
!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "entry", file: !1, line: 1, size: 64, elements: !6)
!6 = !{!7}
!7 = !DIDerivedType(tag: DW_TAG_member, name: "next", scope: !5, file: !1, line: 2, baseType: !4, size: 64)
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
!9 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{i32 1, !"PIC Level", i32 2}
!13 = !{!"clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)"}
!14 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !17)
!15 = !DISubroutineType(types: !16)
!16 = !{null, !4, !4}
!17 = !{!18}
!18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4)
!19 = !DILocation(line: 6, column: 17, scope: !14)
!20 = !DIExpression(DW_OP_plus, 0)
!21 = !DILocation(line: 11, column: 1, scope: !14)