llvm-project/llvm/tools/sancov/sancov.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1198 lines
39 KiB
C++
Raw Normal View History

//===-- sancov.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file is a command-line tool for reading and analyzing sanitizer
// coverage.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
#include <set>
#include <vector>
using namespace llvm;
namespace {
// --------- COMMAND LINE FLAGS ---------
enum ActionType {
CoveredFunctionsAction,
HtmlReportAction,
MergeAction,
NotCoveredFunctionsAction,
PrintAction,
PrintCovPointsAction,
StatsAction,
SymbolizeAction
};
cl::opt<ActionType> Action(
cl::desc("Action (required)"), cl::Required,
cl::values(
clEnumValN(PrintAction, "print", "Print coverage addresses"),
clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
"Print coverage instrumentation points addresses."),
clEnumValN(CoveredFunctionsAction, "covered-functions",
"Print all covered funcions."),
clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
"Print all not covered funcions."),
clEnumValN(StatsAction, "print-coverage-stats",
"Print coverage statistics."),
clEnumValN(HtmlReportAction, "html-report",
"REMOVED. Use -symbolize & coverage-report-server.py."),
clEnumValN(SymbolizeAction, "symbolize",
"Produces a symbolized JSON report from binary report."),
clEnumValN(MergeAction, "merge", "Merges reports.")));
static cl::list<std::string>
ClInputFiles(cl::Positional, cl::OneOrMore,
cl::desc("<action> <binary files...> <.sancov files...> "
"<.symcov files...>"));
static cl::opt<bool> ClDemangle("demangle", cl::init(true),
cl::desc("Print demangled function name."));
static cl::opt<bool>
ClSkipDeadFiles("skip-dead-files", cl::init(true),
cl::desc("Do not list dead source files in reports."));
static cl::opt<std::string> ClStripPathPrefix(
"strip_path_prefix", cl::init(""),
cl::desc("Strip this prefix from file paths in reports."));
static cl::opt<std::string>
ClBlacklist("blacklist", cl::init(""),
cl::desc("Blacklist file (sanitizer blacklist format)."));
static cl::opt<bool> ClUseDefaultBlacklist(
"use_default_blacklist", cl::init(true), cl::Hidden,
cl::desc("Controls if default blacklist should be used."));
static const char *const DefaultBlacklistStr = "fun:__sanitizer_.*\n"
"src:/usr/include/.*\n"
"src:.*/libc\\+\\+/.*\n";
// --------- FORMAT SPECIFICATION ---------
struct FileHeader {
uint32_t Bitness;
uint32_t Magic;
};
static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
static const uint32_t Bitness32 = 0xFFFFFF32;
static const uint32_t Bitness64 = 0xFFFFFF64;
static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
static const Regex SymcovFileRegex(".*\\.symcov");
// --------- MAIN DATASTRUCTURES ----------
// Contents of .sancov file: list of coverage point addresses that were
// executed.
struct RawCoverage {
explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
: Addrs(std::move(Addrs)) {}
// Read binary .sancov file.
static ErrorOr<std::unique_ptr<RawCoverage>>
read(const std::string &FileName);
std::unique_ptr<std::set<uint64_t>> Addrs;
};
// Coverage point has an opaque Id and corresponds to multiple source locations.
struct CoveragePoint {
explicit CoveragePoint(const std::string &Id) : Id(Id) {}
std::string Id;
SmallVector<DILineInfo, 1> Locs;
};
// Symcov file content: set of covered Ids plus information about all available
// coverage points.
struct SymbolizedCoverage {
// Read json .symcov file.
static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
std::set<std::string> CoveredIds;
std::string BinaryHash;
std::vector<CoveragePoint> Points;
};
struct CoverageStats {
size_t AllPoints;
size_t CovPoints;
size_t AllFns;
size_t CovFns;
};
// --------- ERROR HANDLING ---------
static void fail(const llvm::Twine &E) {
errs() << "ERROR: " << E << "\n";
exit(1);
}
static void failIf(bool B, const llvm::Twine &E) {
if (B)
fail(E);
}
static void failIfError(std::error_code Error) {
if (!Error)
return;
errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n";
exit(1);
}
template <typename T> static void failIfError(const ErrorOr<T> &E) {
failIfError(E.getError());
}
static void failIfError(Error Err) {
if (Err) {
logAllUnhandledErrors(std::move(Err), errs(), "ERROR: ");
exit(1);
}
}
template <typename T> static void failIfError(Expected<T> &E) {
failIfError(E.takeError());
}
static void failIfNotEmpty(const llvm::Twine &E) {
if (E.str().empty())
return;
fail(E);
}
template <typename T>
static void failIfEmpty(const std::unique_ptr<T> &Ptr,
const std::string &Message) {
if (Ptr.get())
return;
fail(Message);
}
// ----------- Coverage I/O ----------
template <typename T>
static void readInts(const char *Start, const char *End,
std::set<uint64_t> *Ints) {
const T *S = reinterpret_cast<const T *>(Start);
const T *E = reinterpret_cast<const T *>(End);
std::copy(S, E, std::inserter(*Ints, Ints->end()));
}
ErrorOr<std::unique_ptr<RawCoverage>>
RawCoverage::read(const std::string &FileName) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFile(FileName);
if (!BufOrErr)
return BufOrErr.getError();
std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
if (Buf->getBufferSize() < 8) {
errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
return make_error_code(errc::illegal_byte_sequence);
}
const FileHeader *Header =
reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
if (Header->Magic != BinCoverageMagic) {
errs() << "Wrong magic: " << Header->Magic << '\n';
return make_error_code(errc::illegal_byte_sequence);
}
auto Addrs = std::make_unique<std::set<uint64_t>>();
switch (Header->Bitness) {
case Bitness64:
readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
Addrs.get());
break;
case Bitness32:
readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
Addrs.get());
break;
default:
errs() << "Unsupported bitness: " << Header->Bitness << '\n';
return make_error_code(errc::illegal_byte_sequence);
}
// Ignore slots that are zero, so a runtime implementation is not required
// to compactify the data.
Addrs->erase(0);
return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs)));
}
// Print coverage addresses.
raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
for (auto Addr : *CoverageData.Addrs) {
OS << "0x";
OS.write_hex(Addr);
OS << "\n";
}
return OS;
}
static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
OS << "all-edges: " << Stats.AllPoints << "\n";
OS << "cov-edges: " << Stats.CovPoints << "\n";
OS << "all-functions: " << Stats.AllFns << "\n";
OS << "cov-functions: " << Stats.CovFns << "\n";
return OS;
}
// Output symbolized information for coverage points in JSON.
// Format:
// {
// '<file_name>' : {
// '<function_name>' : {
// '<point_id'> : '<line_number>:'<column_number'.
// ....
// }
// }
// }
static void operator<<(json::OStream &W,
const std::vector<CoveragePoint> &Points) {
// Group points by file.
std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
for (const auto &Point : Points) {
for (const DILineInfo &Loc : Point.Locs) {
PointsByFile[Loc.FileName].push_back(&Point);
}
}
for (const auto &P : PointsByFile) {
std::string FileName = P.first;
std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
for (auto PointPtr : P.second) {
for (const DILineInfo &Loc : PointPtr->Locs) {
PointsByFn[Loc.FunctionName].push_back(PointPtr);
}
}
W.attributeObject(P.first, [&] {
// Group points by function.
for (const auto &P : PointsByFn) {
std::string FunctionName = P.first;
std::set<std::string> WrittenIds;
W.attributeObject(FunctionName, [&] {
for (const CoveragePoint *Point : P.second) {
for (const auto &Loc : Point->Locs) {
if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
continue;
if (WrittenIds.find(Point->Id) != WrittenIds.end())
continue;
// Output <point_id> : "<line>:<col>".
WrittenIds.insert(Point->Id);
W.attribute(Point->Id,
(utostr(Loc.Line) + ":" + utostr(Loc.Column)));
}
}
});
}
});
}
}
static void operator<<(json::OStream &W, const SymbolizedCoverage &C) {
W.object([&] {
W.attributeArray("covered-points", [&] {
for (const std::string &P : C.CoveredIds) {
W.value(P);
}
});
W.attribute("binary-hash", C.BinaryHash);
W.attributeObject("point-symbol-info", [&] { W << C.Points; });
});
}
static std::string parseScalarString(yaml::Node *N) {
SmallString<64> StringStorage;
yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
failIf(!S, "expected string");
return std::string(S->getValue(StringStorage));
}
std::unique_ptr<SymbolizedCoverage>
SymbolizedCoverage::read(const std::string &InputFile) {
auto Coverage(std::make_unique<SymbolizedCoverage>());
std::map<std::string, CoveragePoint> Points;
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFile(InputFile);
failIfError(BufOrErr);
SourceMgr SM;
yaml::Stream S(**BufOrErr, SM);
yaml::document_iterator DI = S.begin();
failIf(DI == S.end(), "empty document: " + InputFile);
yaml::Node *Root = DI->getRoot();
failIf(!Root, "expecting root node: " + InputFile);
yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
failIf(!Top, "expecting mapping node: " + InputFile);
for (auto &KVNode : *Top) {
auto Key = parseScalarString(KVNode.getKey());
if (Key == "covered-points") {
yaml::SequenceNode *Points =
dyn_cast<yaml::SequenceNode>(KVNode.getValue());
failIf(!Points, "expected array: " + InputFile);
for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
Coverage->CoveredIds.insert(parseScalarString(&*I));
}
} else if (Key == "binary-hash") {
Coverage->BinaryHash = parseScalarString(KVNode.getValue());
} else if (Key == "point-symbol-info") {
yaml::MappingNode *PointSymbolInfo =
dyn_cast<yaml::MappingNode>(KVNode.getValue());
failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
for (auto &FileKVNode : *PointSymbolInfo) {
auto Filename = parseScalarString(FileKVNode.getKey());
yaml::MappingNode *FileInfo =
dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
failIf(!FileInfo, "expected mapping node: " + InputFile);
for (auto &FunctionKVNode : *FileInfo) {
auto FunctionName = parseScalarString(FunctionKVNode.getKey());
yaml::MappingNode *FunctionInfo =
dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
failIf(!FunctionInfo, "expected mapping node: " + InputFile);
for (auto &PointKVNode : *FunctionInfo) {
auto PointId = parseScalarString(PointKVNode.getKey());
auto Loc = parseScalarString(PointKVNode.getValue());
size_t ColonPos = Loc.find(':');
failIf(ColonPos == std::string::npos, "expected ':': " + InputFile);
auto LineStr = Loc.substr(0, ColonPos);
auto ColStr = Loc.substr(ColonPos + 1, Loc.size());
if (Points.find(PointId) == Points.end())
Points.insert(std::make_pair(PointId, CoveragePoint(PointId)));
DILineInfo LineInfo;
LineInfo.FileName = Filename;
LineInfo.FunctionName = FunctionName;
char *End;
LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10);
LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10);
CoveragePoint *CoveragePoint = &Points.find(PointId)->second;
CoveragePoint->Locs.push_back(LineInfo);
}
}
}
} else {
errs() << "Ignoring unknown key: " << Key << "\n";
}
}
for (auto &KV : Points) {
Coverage->Points.push_back(KV.second);
}
return Coverage;
}
// ---------- MAIN FUNCTIONALITY ----------
std::string stripPathPrefix(std::string Path) {
if (ClStripPathPrefix.empty())
return Path;
size_t Pos = Path.find(ClStripPathPrefix);
if (Pos == std::string::npos)
return Path;
return Path.substr(Pos + ClStripPathPrefix.size());
}
static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
symbolize::LLVMSymbolizer::Options SymbolizerOptions;
SymbolizerOptions.Demangle = ClDemangle;
SymbolizerOptions.UseSymbolTable = true;
return std::unique_ptr<symbolize::LLVMSymbolizer>(
new symbolize::LLVMSymbolizer(SymbolizerOptions));
}
static std::string normalizeFilename(const std::string &FileName) {
SmallString<256> S(FileName);
sys::path::remove_dots(S, /* remove_dot_dot */ true);
return stripPathPrefix(S.str().str());
}
class Blacklists {
public:
Blacklists()
: DefaultBlacklist(createDefaultBlacklist()),
UserBlacklist(createUserBlacklist()) {}
bool isBlacklisted(const DILineInfo &I) {
if (DefaultBlacklist &&
DefaultBlacklist->inSection("sancov", "fun", I.FunctionName))
return true;
if (DefaultBlacklist &&
DefaultBlacklist->inSection("sancov", "src", I.FileName))
return true;
if (UserBlacklist &&
UserBlacklist->inSection("sancov", "fun", I.FunctionName))
return true;
if (UserBlacklist && UserBlacklist->inSection("sancov", "src", I.FileName))
return true;
return false;
}
private:
static std::unique_ptr<SpecialCaseList> createDefaultBlacklist() {
if (!ClUseDefaultBlacklist)
return std::unique_ptr<SpecialCaseList>();
std::unique_ptr<MemoryBuffer> MB =
MemoryBuffer::getMemBuffer(DefaultBlacklistStr);
std::string Error;
auto Blacklist = SpecialCaseList::create(MB.get(), Error);
failIfNotEmpty(Error);
return Blacklist;
}
static std::unique_ptr<SpecialCaseList> createUserBlacklist() {
if (ClBlacklist.empty())
return std::unique_ptr<SpecialCaseList>();
return SpecialCaseList::createOrDie({{ClBlacklist}},
*vfs::getRealFileSystem());
}
std::unique_ptr<SpecialCaseList> DefaultBlacklist;
std::unique_ptr<SpecialCaseList> UserBlacklist;
};
static std::vector<CoveragePoint>
getCoveragePoints(const std::string &ObjectFile,
const std::set<uint64_t> &Addrs,
const std::set<uint64_t> &CoveredAddrs) {
std::vector<CoveragePoint> Result;
auto Symbolizer(createSymbolizer());
Blacklists B;
std::set<std::string> CoveredFiles;
if (ClSkipDeadFiles) {
for (auto Addr : CoveredAddrs) {
// TODO: it would be neccessary to set proper section index here.
// object::SectionedAddress::UndefSection works for only absolute
// addresses.
object::SectionedAddress ModuleAddress = {
Addr, object::SectionedAddress::UndefSection};
auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
failIfError(LineInfo);
CoveredFiles.insert(LineInfo->FileName);
auto InliningInfo =
Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
failIfError(InliningInfo);
for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
auto FrameInfo = InliningInfo->getFrame(I);
CoveredFiles.insert(FrameInfo.FileName);
}
}
}
for (auto Addr : Addrs) {
std::set<DILineInfo> Infos; // deduplicate debug info.
// TODO: it would be neccessary to set proper section index here.
// object::SectionedAddress::UndefSection works for only absolute addresses.
object::SectionedAddress ModuleAddress = {
Addr, object::SectionedAddress::UndefSection};
auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
failIfError(LineInfo);
if (ClSkipDeadFiles &&
CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end())
continue;
LineInfo->FileName = normalizeFilename(LineInfo->FileName);
if (B.isBlacklisted(*LineInfo))
continue;
auto Id = utohexstr(Addr, true);
auto Point = CoveragePoint(Id);
Infos.insert(*LineInfo);
Point.Locs.push_back(*LineInfo);
auto InliningInfo =
Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
failIfError(InliningInfo);
for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
auto FrameInfo = InliningInfo->getFrame(I);
if (ClSkipDeadFiles &&
CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end())
continue;
FrameInfo.FileName = normalizeFilename(FrameInfo.FileName);
if (B.isBlacklisted(FrameInfo))
continue;
if (Infos.find(FrameInfo) == Infos.end()) {
Infos.insert(FrameInfo);
Point.Locs.push_back(FrameInfo);
}
}
Result.push_back(Point);
}
return Result;
}
static bool isCoveragePointSymbol(StringRef Name) {
return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
Name == "__sanitizer_cov_trace_func_enter" ||
Name == "__sanitizer_cov_trace_pc_guard" ||
// Mac has '___' prefix
Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
Name == "___sanitizer_cov_trace_func_enter" ||
Name == "___sanitizer_cov_trace_pc_guard";
}
// Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
std::set<uint64_t> *Result) {
MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
MachO::symtab_command Symtab = O.getSymtabLoadCommand();
for (const auto &Load : O.load_commands()) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section_64 Sec = O.getSection64(Load, J);
uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
if (SectionType == MachO::S_SYMBOL_STUBS) {
uint32_t Stride = Sec.reserved2;
uint32_t Cnt = Sec.size / Stride;
uint32_t N = Sec.reserved1;
for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
uint32_t IndirectSymbol =
O.getIndirectSymbolTableEntry(Dysymtab, N + J);
uint64_t Addr = Sec.addr + J * Stride;
if (IndirectSymbol < Symtab.nsyms) {
object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol));
Expected<StringRef> Name = Symbol.getName();
failIfError(Name);
if (isCoveragePointSymbol(Name.get())) {
Result->insert(Addr);
}
}
}
}
}
}
if (Load.C.cmd == MachO::LC_SEGMENT) {
errs() << "ERROR: 32 bit MachO binaries not supported\n";
}
}
}
// Locate __sanitizer_cov* function addresses that are used for coverage
// reporting.
static std::set<uint64_t>
findSanitizerCovFunctions(const object::ObjectFile &O) {
std::set<uint64_t> Result;
for (const object::SymbolRef &Symbol : O.symbols()) {
Expected<uint64_t> AddressOrErr = Symbol.getAddress();
failIfError(AddressOrErr);
uint64_t Address = AddressOrErr.get();
Thread Expected<...> up from libObject’s getName() for symbols to allow llvm-objdump to produce a good error message. Produce another specific error message for a malformed Mach-O file when a symbol’s string index is past the end of the string table. The existing test case in test/Object/macho-invalid.test for macho-invalid-symbol-name-past-eof now reports the error with the message indicating that a symbol at a specific index has a bad sting index and that bad string index value. Again converting interfaces to Expected<> from ErrorOr<> does involve touching a number of places. Where the existing code reported the error with a string message or an error code it was converted to do the same. There is some code for this that could be factored into a routine but I would like to leave that for the code owners post-commit to do as they want for handling an llvm::Error. An example of how this could be done is shown in the diff in lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h which had a Check() routine already for std::error_code so I added one like it for llvm::Error . Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values.  So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(NameOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there fixes needed to lld that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 266919
2016-04-21 05:24:34 +08:00
Expected<StringRef> NameOrErr = Symbol.getName();
failIfError(NameOrErr);
StringRef Name = NameOrErr.get();
if (!(Symbol.getFlags() & object::BasicSymbolRef::SF_Undefined) &&
isCoveragePointSymbol(Name)) {
Result.insert(Address);
}
}
if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) {
for (const object::ExportDirectoryEntryRef &Export :
CO->export_directories()) {
uint32_t RVA;
std::error_code EC = Export.getExportRVA(RVA);
failIfError(EC);
StringRef Name;
EC = Export.getSymbolName(Name);
failIfError(EC);
if (isCoveragePointSymbol(Name))
Result.insert(CO->getImageBase() + RVA);
}
}
if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) {
findMachOIndirectCovFunctions(*MO, &Result);
}
return Result;
}
static uint64_t getPreviousInstructionPc(uint64_t PC,
Triple TheTriple) {
if (TheTriple.isARM()) {
return (PC - 3) & (~1);
} else if (TheTriple.isAArch64()) {
return PC - 4;
} else if (TheTriple.isMIPS()) {
return PC - 8;
} else {
return PC - 1;
}
}
// Locate addresses of all coverage points in a file. Coverage point
// is defined as the 'address of instruction following __sanitizer_cov
// call - 1'.
static void getObjectCoveragePoints(const object::ObjectFile &O,
std::set<uint64_t> *Addrs) {
Triple TheTriple("unknown-unknown-unknown");
TheTriple.setArch(Triple::ArchType(O.getArch()));
auto TripleName = TheTriple.getTriple();
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
failIfNotEmpty(Error);
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, "", ""));
failIfEmpty(STI, "no subtarget info for target " + TripleName);
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
failIfEmpty(MRI, "no register info for target " + TripleName);
MCTargetOptions MCOptions;
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
failIfEmpty(AsmInfo, "no asm info for target " + TripleName);
std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
failIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
failIfEmpty(MII, "no instruction info for target " + TripleName);
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
auto SanCovAddrs = findSanitizerCovFunctions(O);
if (SanCovAddrs.empty())
fail("__sanitizer_cov* functions not found");
for (object::SectionRef Section : O.sections()) {
if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
continue;
uint64_t SectionAddr = Section.getAddress();
uint64_t SectSize = Section.getSize();
if (!SectSize)
continue;
Expected<StringRef> BytesStr = Section.getContents();
failIfError(BytesStr);
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr);
for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
Index += Size) {
MCInst Inst;
if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
SectionAddr + Index, nulls())) {
if (Size == 0)
Size = 1;
continue;
}
uint64_t Addr = Index + SectionAddr;
// Sanitizer coverage uses the address of the next instruction - 1.
uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple);
uint64_t Target;
if (MIA->isCall(Inst) &&
MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
SanCovAddrs.find(Target) != SanCovAddrs.end())
Addrs->insert(CovPoint);
}
}
}
static void
visitObjectFiles(const object::Archive &A,
function_ref<void(const object::ObjectFile &)> Fn) {
Error Err = Error::success();
for (auto &C : A.children(Err)) {
Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
failIfError(ChildOrErr);
if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
Fn(*O);
else
failIfError(object::object_error::invalid_file_type);
}
failIfError(std::move(Err));
}
static void
visitObjectFiles(const std::string &FileName,
function_ref<void(const object::ObjectFile &)> Fn) {
Thread Expected<...> up from createMachOObjectFile() to allow llvm-objdump to produce a real error message Produce the first specific error message for a malformed Mach-O file describing the problem instead of the generic message for object_error::parse_failed of "Invalid data was encountered while parsing the file”.  Many more good error messages will follow after this first one. This is built on Lang Hames’ great work of adding the ’Error' class for structured error handling and threading Error through MachOObjectFile construction. And making createMachOObjectFile return Expected<...> . So to to get the error to the llvm-obdump tool, I changed the stack of these methods to also return Expected<...> : object::ObjectFile::createObjectFile() object::SymbolicFile::createSymbolicFile() object::createBinary() Then finally in ParseInputMachO() in MachODump.cpp the error can be reported and the specific error message can be printed in llvm-objdump and can be seen in the existing test case for the existing malformed binary but with the updated error message. Converting these interfaces to Expected<> from ErrorOr<> does involve touching a number of places. To contain the changes for now use of errorToErrorCode() and errorOrToExpected() are used where the callers are yet to be converted. Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values. So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(ObjOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there is one fix also needed to lld/COFF/InputFiles.cpp that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 265606
2016-04-07 06:14:09 +08:00
Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
object::createBinary(FileName);
Thread Expected<...> up from createMachOObjectFile() to allow llvm-objdump to produce a real error message Produce the first specific error message for a malformed Mach-O file describing the problem instead of the generic message for object_error::parse_failed of "Invalid data was encountered while parsing the file”.  Many more good error messages will follow after this first one. This is built on Lang Hames’ great work of adding the ’Error' class for structured error handling and threading Error through MachOObjectFile construction. And making createMachOObjectFile return Expected<...> . So to to get the error to the llvm-obdump tool, I changed the stack of these methods to also return Expected<...> : object::ObjectFile::createObjectFile() object::SymbolicFile::createSymbolicFile() object::createBinary() Then finally in ParseInputMachO() in MachODump.cpp the error can be reported and the specific error message can be printed in llvm-objdump and can be seen in the existing test case for the existing malformed binary but with the updated error message. Converting these interfaces to Expected<> from ErrorOr<> does involve touching a number of places. To contain the changes for now use of errorToErrorCode() and errorOrToExpected() are used where the callers are yet to be converted. Also there some were bugs in the existing code that did not deal with the old ErrorOr<> return values. So now with Expected<> since they must be checked and the error handled, I added a TODO and a comment: “// TODO: Actually report errors helpfully” and a call something like consumeError(ObjOrErr.takeError()) so the buggy code will not crash since needed to deal with the Error. Note there is one fix also needed to lld/COFF/InputFiles.cpp that goes along with this that I will commit right after this. So expect lld not to built after this commit and before the next one. llvm-svn: 265606
2016-04-07 06:14:09 +08:00
if (!BinaryOrErr)
failIfError(BinaryOrErr);
object::Binary &Binary = *BinaryOrErr.get().getBinary();
if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
visitObjectFiles(*A, Fn);
else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
Fn(*O);
else
failIfError(object::object_error::invalid_file_type);
}
static std::set<uint64_t>
findSanitizerCovFunctions(const std::string &FileName) {
std::set<uint64_t> Result;
visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
auto Addrs = findSanitizerCovFunctions(O);
Result.insert(Addrs.begin(), Addrs.end());
});
return Result;
}
// Locate addresses of all coverage points in a file. Coverage point
// is defined as the 'address of instruction following __sanitizer_cov
// call - 1'.
static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
std::set<uint64_t> Result;
visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
getObjectCoveragePoints(O, &Result);
});
return Result;
}
static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) {
OS << "0x";
OS.write_hex(Addr);
OS << "\n";
}
}
static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
auto ShortFileName = llvm::sys::path::filename(FileName);
if (!SancovFileRegex.match(ShortFileName))
return false;
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFile(FileName);
if (!BufOrErr) {
errs() << "Warning: " << BufOrErr.getError().message() << "("
<< BufOrErr.getError().value()
<< "), filename: " << llvm::sys::path::filename(FileName) << "\n";
return BufOrErr.getError();
}
std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
if (Buf->getBufferSize() < 8) {
return false;
}
const FileHeader *Header =
reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
return Header->Magic == BinCoverageMagic;
}
static bool isSymbolizedCoverageFile(const std::string &FileName) {
auto ShortFileName = llvm::sys::path::filename(FileName);
return SymcovFileRegex.match(ShortFileName);
}
static std::unique_ptr<SymbolizedCoverage>
symbolize(const RawCoverage &Data, const std::string ObjectFile) {
auto Coverage = std::make_unique<SymbolizedCoverage>();
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFile(ObjectFile);
failIfError(BufOrErr);
SHA1 Hasher;
Hasher.update((*BufOrErr)->getBuffer());
Coverage->BinaryHash = toHex(Hasher.final());
Blacklists B;
auto Symbolizer(createSymbolizer());
for (uint64_t Addr : *Data.Addrs) {
// TODO: it would be neccessary to set proper section index here.
// object::SectionedAddress::UndefSection works for only absolute addresses.
auto LineInfo = Symbolizer->symbolizeCode(
ObjectFile, {Addr, object::SectionedAddress::UndefSection});
failIfError(LineInfo);
if (B.isBlacklisted(*LineInfo))
continue;
Coverage->CoveredIds.insert(utohexstr(Addr, true));
}
std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile);
if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(),
Data.Addrs->end())) {
fail("Coverage points in binary and .sancov file do not match.");
}
Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs);
return Coverage;
}
struct FileFn {
bool operator<(const FileFn &RHS) const {
return std::tie(FileName, FunctionName) <
std::tie(RHS.FileName, RHS.FunctionName);
}
std::string FileName;
std::string FunctionName;
};
static std::set<FileFn>
computeFunctions(const std::vector<CoveragePoint> &Points) {
std::set<FileFn> Fns;
for (const auto &Point : Points) {
for (const auto &Loc : Point.Locs) {
Fns.insert(FileFn{Loc.FileName, Loc.FunctionName});
}
}
return Fns;
}
static std::set<FileFn>
computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
auto Fns = computeFunctions(Coverage.Points);
for (const auto &Point : Coverage.Points) {
if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
continue;
for (const auto &Loc : Point.Locs) {
Fns.erase(FileFn{Loc.FileName, Loc.FunctionName});
}
}
return Fns;
}
static std::set<FileFn>
computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
auto AllFns = computeFunctions(Coverage.Points);
std::set<FileFn> Result;
for (const auto &Point : Coverage.Points) {
if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
continue;
for (const auto &Loc : Point.Locs) {
Result.insert(FileFn{Loc.FileName, Loc.FunctionName});
}
}
return Result;
}
typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
// finds first location in a file for each function.
static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
const std::set<FileFn> &Fns) {
FunctionLocs Result;
for (const auto &Point : Coverage.Points) {
for (const auto &Loc : Point.Locs) {
FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName};
if (Fns.find(Fn) == Fns.end())
continue;
auto P = std::make_pair(Loc.Line, Loc.Column);
auto I = Result.find(Fn);
if (I == Result.end() || I->second > P) {
Result[Fn] = P;
}
}
}
return Result;
}
static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
for (const auto &P : FnLocs) {
OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " "
<< P.first.FunctionName << "\n";
}
}
CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(),
computeFunctions(Coverage.Points).size(),
computeCoveredFunctions(Coverage).size()};
return Stats;
}
// Print list of covered functions.
// Line format: <file_name>:<line> <function_name>
static void printCoveredFunctions(const SymbolizedCoverage &CovData,
raw_ostream &OS) {
auto CoveredFns = computeCoveredFunctions(CovData);
printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS);
}
// Print list of not covered functions.
// Line format: <file_name>:<line> <function_name>
static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
raw_ostream &OS) {
auto NotCoveredFns = computeNotCoveredFunctions(CovData);
printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS);
}
// Read list of files and merges their coverage info.
static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
raw_ostream &OS) {
std::vector<std::unique_ptr<RawCoverage>> Covs;
for (const auto &FileName : FileNames) {
auto Cov = RawCoverage::read(FileName);
if (!Cov)
continue;
OS << *Cov.get();
}
}
static std::unique_ptr<SymbolizedCoverage>
merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
if (Coverages.empty())
return nullptr;
auto Result = std::make_unique<SymbolizedCoverage>();
for (size_t I = 0; I < Coverages.size(); ++I) {
const SymbolizedCoverage &Coverage = *Coverages[I];
std::string Prefix;
if (Coverages.size() > 1) {
// prefix is not needed when there's only one file.
Prefix = utostr(I);
}
for (const auto &Id : Coverage.CoveredIds) {
Result->CoveredIds.insert(Prefix + Id);
}
for (const auto &CovPoint : Coverage.Points) {
CoveragePoint NewPoint(CovPoint);
NewPoint.Id = Prefix + CovPoint.Id;
Result->Points.push_back(NewPoint);
}
}
if (Coverages.size() == 1) {
Result->BinaryHash = Coverages[0]->BinaryHash;
}
return Result;
}
static std::unique_ptr<SymbolizedCoverage>
readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
{
// Short name => file name.
std::map<std::string, std::string> ObjFiles;
std::string FirstObjFile;
std::set<std::string> CovFiles;
// Partition input values into coverage/object files.
for (const auto &FileName : FileNames) {
if (isSymbolizedCoverageFile(FileName)) {
Coverages.push_back(SymbolizedCoverage::read(FileName));
}
auto ErrorOrIsCoverage = isCoverageFile(FileName);
if (!ErrorOrIsCoverage)
continue;
if (ErrorOrIsCoverage.get()) {
CovFiles.insert(FileName);
} else {
auto ShortFileName = llvm::sys::path::filename(FileName);
if (ObjFiles.find(std::string(ShortFileName)) != ObjFiles.end()) {
fail("Duplicate binary file with a short name: " + ShortFileName);
}
ObjFiles[std::string(ShortFileName)] = FileName;
if (FirstObjFile.empty())
FirstObjFile = FileName;
}
}
SmallVector<StringRef, 2> Components;
// Object file => list of corresponding coverage file names.
std::map<std::string, std::vector<std::string>> CoverageByObjFile;
for (const auto &FileName : CovFiles) {
auto ShortFileName = llvm::sys::path::filename(FileName);
auto Ok = SancovFileRegex.match(ShortFileName, &Components);
if (!Ok) {
fail("Can't match coverage file name against "
"<module_name>.<pid>.sancov pattern: " +
FileName);
}
auto Iter = ObjFiles.find(std::string(Components[1]));
if (Iter == ObjFiles.end()) {
fail("Object file for coverage not found: " + FileName);
}
CoverageByObjFile[Iter->second].push_back(FileName);
};
for (const auto &Pair : ObjFiles) {
auto FileName = Pair.second;
if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end())
errs() << "WARNING: No coverage file for " << FileName << "\n";
}
// Read raw coverage and symbolize it.
for (const auto &Pair : CoverageByObjFile) {
if (findSanitizerCovFunctions(Pair.first).empty()) {
errs()
<< "WARNING: Ignoring " << Pair.first
<< " and its coverage because __sanitizer_cov* functions were not "
"found.\n";
continue;
}
for (const std::string &CoverageFile : Pair.second) {
auto DataOrError = RawCoverage::read(CoverageFile);
failIfError(DataOrError);
Coverages.push_back(symbolize(*DataOrError.get(), Pair.first));
}
}
}
return merge(Coverages);
}
} // namespace
int main(int Argc, char **Argv) {
// Print stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal(Argv[0]);
PrettyStackTraceProgram X(Argc, Argv);
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllDisassemblers();
cl::ParseCommandLineOptions(Argc, Argv,
"Sanitizer Coverage Processing Tool (sancov)\n\n"
" This tool can extract various coverage-related information from: \n"
" coverage-instrumented binary files, raw .sancov files and their "
"symbolized .symcov version.\n"
" Depending on chosen action the tool expects different input files:\n"
" -print-coverage-pcs - coverage-instrumented binary files\n"
" -print-coverage - .sancov files\n"
" <other actions> - .sancov files & corresponding binary "
"files, .symcov files\n"
);
// -print doesn't need object files.
if (Action == PrintAction) {
readAndPrintRawCoverage(ClInputFiles, outs());
return 0;
} else if (Action == PrintCovPointsAction) {
// -print-coverage-points doesn't need coverage files.
for (const std::string &ObjFile : ClInputFiles) {
printCovPoints(ObjFile, outs());
}
return 0;
}
auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles);
failIf(!Coverage, "No valid coverage files given.");
switch (Action) {
case CoveredFunctionsAction: {
printCoveredFunctions(*Coverage, outs());
return 0;
}
case NotCoveredFunctionsAction: {
printNotCoveredFunctions(*Coverage, outs());
return 0;
}
case StatsAction: {
outs() << computeStats(*Coverage);
return 0;
}
case MergeAction:
case SymbolizeAction: { // merge & symbolize are synonims.
json::OStream W(outs(), 2);
W << *Coverage;
return 0;
}
case HtmlReportAction:
errs() << "-html-report option is removed: "
"use -symbolize & coverage-report-server.py instead\n";
return 1;
case PrintAction:
case PrintCovPointsAction:
llvm_unreachable("unsupported action");
}
}