[PDB] Add an explain subcommand.

When investigating various things, we often have a file offset
and what to know what's in the PDB at that address.  For example
we may be doing a binary comparison of two LLD-generated PDBs
to look for sources of non-determinism, or we may wish to compare
an LLD-generated PDB with a Microsoft generated PDB for sources
of byte-for-byte incompatibility.  In these cases, we can do a
binary diff of the two files, and once we find a mismatched byte
we can use explain to figure out what that byte is, immediately
honining in on the problem.

This patch implements this by trying to narrow the meaning of
a particular file offset down as much as possible.

Differential Revision: https://reviews.llvm.org/D44959

llvm-svn: 328799
This commit is contained in:
Zachary Turner 2018-03-29 16:28:20 +00:00
parent c7cc87922e
commit ea40f40e1b
6 changed files with 371 additions and 0 deletions

View File

@ -0,0 +1,83 @@
; RUN: llvm-pdbutil explain -offset=0 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=ZERO %s
; RUN: llvm-pdbutil explain -offset=40 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=FORTY %s
; RUN: llvm-pdbutil explain -offset=60 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=SIXTY %s
; RUN: llvm-pdbutil explain -offset=0x1000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=FPM1 %s
; RUN: llvm-pdbutil explain -offset=0x1100 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=EXTRANEOUSFPM %s
; RUN: llvm-pdbutil explain -offset=0x2000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=FPM2 %s
; RUN: llvm-pdbutil explain -offset=0x3000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=UNALLOCATED %s
; RUN: llvm-pdbutil explain -offset=0x7000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=STREAM %s
; RUN: llvm-pdbutil explain -offset=0x1A000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=STREAMDIR %s
; RUN: llvm-pdbutil explain -offset=0x1B000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=DIRBLOCKLIST %s
; RUN: llvm-pdbutil explain -offset=0x1D000 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=INVALIDFILEOFFSET %s
; RUN: llvm-pdbutil explain -offset=0xA100 %p/Inputs/InjectedSource.pdb \
; RUN: | FileCheck --check-prefix=UNUSED %s
ZERO: Block:Offset = 0:0000.
ZERO-NEXT: Address is in block 0 (allocated).
ZERO-NEXT: This corresponds to offset 0 of MSF super block,
ZERO-NEXT: which is part of the MSF file magic.
FORTY: Block:Offset = 0:0028.
FORTY-NEXT: Address is in block 0 (allocated).
FORTY-NEXT: This corresponds to offset 40 of MSF super block,
FORTY-NEXT: which contains the number of bytes in the stream directory.
SIXTY: Block:Offset = 0:003C.
SIXTY-NEXT: Address is in block 0 (allocated).
SIXTY-NEXT: This corresponds to offset 60 of MSF super block,
SIXTY-NEXT: which is outside the range of valid data for the super block.
FPM1: Block:Offset = 1:0000.
FPM1-NEXT: Address is in block 1 (allocated).
FPM1-NEXT: Address is in FPM1 (Alt FPM)
FPM1-NEXT: Address describes the allocation status of blocks [0,8)
EXTRANEOUSFPM: Block:Offset = 1:0100.
EXTRANEOUSFPM-NEXT: Address is in block 1 (allocated).
EXTRANEOUSFPM-NEXT: Address is in FPM1 (Alt FPM)
EXTRANEOUSFPM-NEXT: Address is in extraneous FPM space.
FPM2: Block:Offset = 2:0000.
FPM2-NEXT: Address is in block 2 (allocated).
FPM2-NEXT: Address is in FPM2 (Main FPM)
FPM2-NEXT: Address describes the allocation status of blocks [0,8)
UNALLOCATED: Block:Offset = 3:0000.
UNALLOCATED-NEXT: Address is in block 3 (unallocated).
STREAM: Block:Offset = 7:0000.
STREAM-NEXT: Address is in block 7 (allocated).
STREAM-NEXT: Address is at offset 0/684 of Stream 12 (Module "* Linker *").
STREAMDIR: Block:Offset = 1A:0000.
STREAMDIR-NEXT: Address is in block 26 (allocated).
STREAMDIR-NEXT: Address is at offset 0/156 of Stream Directory.
DIRBLOCKLIST: Block:Offset = 1B:0000.
DIRBLOCKLIST-NEXT: Address is in block 27 (allocated).
DIRBLOCKLIST-NEXT: Address is at offset 0 of the directory block list
INVALIDFILEOFFSET: Address 118784 is not in the file (file size = 118784).
UNUSED: Block:Offset = A:0100.
UNUSED-NEXT: Address is in block 10 (allocated).
UNUSED-NEXT: Address is at offset 256/120 of Stream 11 (Section Header Data) in unused space.

View File

@ -12,6 +12,7 @@ add_llvm_tool(llvm-pdbutil
Analyze.cpp
BytesOutputStyle.cpp
DumpOutputStyle.cpp
ExplainOutputStyle.cpp
InputFile.cpp
llvm-pdbutil.cpp
FormatUtil.cpp

View File

@ -0,0 +1,200 @@
//===- ExplainOutputStyle.cpp --------------------------------- *- C++ --*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "ExplainOutputStyle.h"
#include "FormatUtil.h"
#include "StreamUtil.h"
#include "llvm-pdbutil.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::msf;
using namespace llvm::pdb;
ExplainOutputStyle::ExplainOutputStyle(PDBFile &File, uint64_t FileOffset)
: File(File), FileOffset(FileOffset),
BlockIndex(FileOffset / File.getBlockSize()),
OffsetInBlock(FileOffset - BlockIndex * File.getBlockSize()),
P(2, false, outs()) {}
Error ExplainOutputStyle::dump() {
P.formatLine("Explaining file offset {0} of file '{1}'.", FileOffset,
File.getFilePath());
bool IsAllocated = explainBlockStatus();
if (!IsAllocated)
return Error::success();
AutoIndent Indent(P);
if (isSuperBlock())
explainSuperBlockOffset();
else if (isFpmBlock())
explainFpmBlockOffset();
else if (isBlockMapBlock())
explainBlockMapOffset();
else if (isStreamDirectoryBlock())
explainStreamDirectoryOffset();
else if (auto Index = getBlockStreamIndex())
explainStreamOffset(*Index);
else
explainUnknownBlock();
return Error::success();
}
bool ExplainOutputStyle::isSuperBlock() const { return BlockIndex == 0; }
bool ExplainOutputStyle::isFpm1() const {
return ((BlockIndex - 1) % File.getBlockSize() == 0);
}
bool ExplainOutputStyle::isFpm2() const {
return ((BlockIndex - 2) % File.getBlockSize() == 0);
}
bool ExplainOutputStyle::isFpmBlock() const { return isFpm1() || isFpm2(); }
bool ExplainOutputStyle::isBlockMapBlock() const {
return BlockIndex == File.getBlockMapIndex();
}
bool ExplainOutputStyle::isStreamDirectoryBlock() const {
const auto &Layout = File.getMsfLayout();
return llvm::is_contained(Layout.DirectoryBlocks, BlockIndex);
}
Optional<uint32_t> ExplainOutputStyle::getBlockStreamIndex() const {
const auto &Layout = File.getMsfLayout();
for (const auto &Entry : enumerate(Layout.StreamMap)) {
if (!llvm::is_contained(Entry.value(), BlockIndex))
continue;
return Entry.index();
}
return None;
}
bool ExplainOutputStyle::explainBlockStatus() {
if (FileOffset >= File.getFileSize()) {
P.formatLine("Address {0} is not in the file (file size = {1}).",
FileOffset, File.getFileSize());
return false;
}
P.formatLine("Block:Offset = {2:X-}:{1:X-4}.", FileOffset, OffsetInBlock,
BlockIndex);
bool IsFree = File.getMsfLayout().FreePageMap[BlockIndex];
P.formatLine("Address is in block {0} ({1}allocated).", BlockIndex,
IsFree ? "un" : "");
return !IsFree;
}
void ExplainOutputStyle::explainSuperBlockOffset() {
P.formatLine("This corresponds to offset {0} of MSF super block, ",
OffsetInBlock);
if (OffsetInBlock < sizeof(msf::Magic))
P.printLine("which is part of the MSF file magic.");
else if (OffsetInBlock < offsetof(SuperBlock, BlockSize))
P.printLine("which contains the block size of the file.");
else if (OffsetInBlock < offsetof(SuperBlock, FreeBlockMapBlock))
P.printLine("which contains the index of the FPM block (e.g. 1 or 2).");
else if (OffsetInBlock < offsetof(SuperBlock, NumBlocks))
P.printLine("which contains the number of blocks in the file.");
else if (OffsetInBlock < offsetof(SuperBlock, NumDirectoryBytes))
P.printLine("which contains the number of bytes in the stream directory.");
else if (OffsetInBlock < offsetof(SuperBlock, Unknown1))
P.printLine("whose purpose is unknown.");
else if (OffsetInBlock < offsetof(SuperBlock, BlockMapAddr))
P.printLine("which contains the file offset of the block map.");
else {
assert(OffsetInBlock > sizeof(SuperBlock));
P.printLine(
"which is outside the range of valid data for the super block.");
}
}
void ExplainOutputStyle::explainFpmBlockOffset() {
const MSFLayout &Layout = File.getMsfLayout();
uint32_t MainFpm = Layout.mainFpmBlock();
uint32_t AltFpm = Layout.alternateFpmBlock();
assert(isFpmBlock());
uint32_t Fpm = isFpm1() ? 1 : 2;
uint32_t FpmChunk = BlockIndex / File.getBlockSize();
assert((Fpm == MainFpm) || (Fpm == AltFpm));
(void)AltFpm;
bool IsMain = (Fpm == MainFpm);
P.formatLine("Address is in FPM{0} ({1} FPM)", Fpm, IsMain ? "Main" : "Alt");
uint32_t DescribedBlockStart =
8 * (FpmChunk * File.getBlockSize() + OffsetInBlock);
if (DescribedBlockStart > File.getBlockCount()) {
P.printLine("Address is in extraneous FPM space.");
return;
}
P.formatLine("Address describes the allocation status of blocks [{0},{1})",
DescribedBlockStart, DescribedBlockStart + 8);
}
static bool offsetIsInBlock(const PDBFile &File, uint64_t Offset,
uint32_t Block) {
uint64_t BlockOffset = uint64_t(Block) * File.getBlockSize();
uint64_t BlockOffset1 = BlockOffset + File.getBlockSize();
return (Offset >= BlockOffset && Offset < BlockOffset1);
}
void ExplainOutputStyle::explainBlockMapOffset() {
assert(offsetIsInBlock(File, FileOffset, File.getBlockMapIndex()));
uint64_t BlockMapOffset = File.getBlockMapOffset();
uint32_t OffsetInBlock = FileOffset - BlockMapOffset;
P.formatLine("Address is at offset {0} of the directory block list",
OffsetInBlock);
}
static uint32_t getOffsetInStream(ArrayRef<support::ulittle32_t> StreamBlocks,
uint64_t FileOffset, uint32_t BlockSize) {
uint32_t BlockIndex = FileOffset / BlockSize;
uint32_t OffsetInBlock = FileOffset - BlockIndex * BlockSize;
auto Iter = llvm::find(StreamBlocks, BlockIndex);
assert(Iter != StreamBlocks.end());
uint32_t StreamBlockIndex = std::distance(StreamBlocks.begin(), Iter);
return StreamBlockIndex * BlockSize + OffsetInBlock;
}
void ExplainOutputStyle::explainStreamOffset(uint32_t Stream) {
SmallVector<StreamInfo, 12> Streams;
discoverStreamPurposes(File, Streams);
assert(Stream <= Streams.size());
const StreamInfo &S = Streams[Stream];
const auto &Layout = File.getStreamLayout(Stream);
uint32_t StreamOff =
getOffsetInStream(Layout.Blocks, FileOffset, File.getBlockSize());
P.formatLine("Address is at offset {0}/{1} of Stream {2} ({3}){4}.",
StreamOff, Layout.Length, Stream, S.getLongName(),
(StreamOff > Layout.Length) ? " in unused space" : "");
}
void ExplainOutputStyle::explainStreamDirectoryOffset() {
auto DirectoryBlocks = File.getDirectoryBlockArray();
const auto &Layout = File.getMsfLayout();
uint32_t StreamOff =
getOffsetInStream(DirectoryBlocks, FileOffset, File.getBlockSize());
P.formatLine("Address is at offset {0}/{1} of Stream Directory{2}.",
StreamOff, uint32_t(Layout.SB->NumDirectoryBytes),
uint32_t(StreamOff > Layout.SB->NumDirectoryBytes)
? " in unused space"
: "");
}
void ExplainOutputStyle::explainUnknownBlock() {
P.formatLine("Address has unknown purpose.");
}

View File

@ -0,0 +1,59 @@
//===- ExplainOutputStyle.h ----------------------------------- *- C++ --*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H
#define LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H
#include "LinePrinter.h"
#include "OutputStyle.h"
#include <string>
namespace llvm {
namespace pdb {
class PDBFile;
class ExplainOutputStyle : public OutputStyle {
public:
ExplainOutputStyle(PDBFile &File, uint64_t FileOffset);
Error dump() override;
private:
bool explainBlockStatus();
bool isFpm1() const;
bool isFpm2() const;
bool isSuperBlock() const;
bool isFpmBlock() const;
bool isBlockMapBlock() const;
bool isStreamDirectoryBlock() const;
Optional<uint32_t> getBlockStreamIndex() const;
void explainSuperBlockOffset();
void explainFpmBlockOffset();
void explainBlockMapOffset();
void explainStreamDirectoryOffset();
void explainStreamOffset(uint32_t Stream);
void explainUnknownBlock();
PDBFile &File;
const uint64_t FileOffset;
const uint64_t BlockIndex;
const uint64_t OffsetInBlock;
LinePrinter P;
};
} // namespace pdb
} // namespace llvm
#endif

View File

@ -16,6 +16,7 @@
#include "Analyze.h"
#include "BytesOutputStyle.h"
#include "DumpOutputStyle.h"
#include "ExplainOutputStyle.h"
#include "InputFile.h"
#include "LinePrinter.h"
#include "OutputStyle.h"
@ -111,6 +112,9 @@ cl::SubCommand
cl::SubCommand MergeSubcommand("merge",
"Merge multiple PDBs into a single PDB");
cl::SubCommand ExplainSubcommand("explain",
"Explain the meaning of a file offset");
cl::OptionCategory TypeCategory("Symbol Type Options");
cl::OptionCategory FilterCategory("Filtering and Sorting Options");
cl::OptionCategory OtherOptions("Other Options");
@ -605,6 +609,16 @@ cl::opt<std::string>
PdbOutputFile("pdb", cl::desc("the name of the PDB file to write"),
cl::sub(MergeSubcommand));
}
namespace explain {
cl::list<std::string> InputFilename(cl::Positional,
cl::desc("<input PDB file>"), cl::Required,
cl::sub(ExplainSubcommand));
cl::opt<uint64_t> Offset("offset", cl::desc("The file offset to explain"),
cl::sub(ExplainSubcommand), cl::Required,
cl::OneOrMore);
} // namespace explain
}
static ExitOnError ExitOnErr;
@ -1074,6 +1088,14 @@ static void mergePdbs() {
ExitOnErr(Builder.commit(OutFile));
}
static void explain() {
std::unique_ptr<IPDBSession> Session;
PDBFile &File = loadPDB(opts::explain::InputFilename.front(), Session);
auto O = llvm::make_unique<ExplainOutputStyle>(File, opts::explain::Offset);
ExitOnErr(O->dump());
}
static bool parseRange(StringRef Str,
Optional<opts::bytes::NumberRange> &Parsed) {
if (Str.empty())
@ -1248,6 +1270,8 @@ int main(int argc_, const char *argv_[]) {
exit(1);
}
mergePdbs();
} else if (opts::ExplainSubcommand) {
explain();
}
outs().flush();

View File

@ -189,6 +189,10 @@ extern llvm::cl::list<ModuleSubsection> DumpModuleSubsections;
extern llvm::cl::opt<bool> DumpModuleSyms;
} // namespace pdb2yaml
namespace explain {
extern llvm::cl::list<std::string> InputFilename;
extern llvm::cl::opt<uint64_t> Offset;
} // namespace explain
}
#endif