From 36cf37c4c134ab25b1b9cda01a4535d347814cb2 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Thu, 30 Jan 2020 13:10:48 -0800 Subject: [PATCH] [BOLT] Add initial bits for parsing MachO files Summary: Start adding initial bits for MachO, this diff contains some small preparations for finding functions inside a MachO binary, this will be done in the next diff. The concept of a section in the MachO world is quite different from ELF, nevertheless, for functions for now it more or less fits into the current picture (in BOLT), but things will diverge more significantly a bit later. (cherry picked from FBD19648161) --- bolt/src/BinarySection.h | 19 +++++---- bolt/src/CMakeLists.txt | 2 + bolt/src/MachORewriteInstance.cpp | 67 +++++++++++++++++++++++++++++++ bolt/src/MachORewriteInstance.h | 43 ++++++++++++++++++++ bolt/src/RewriteInstance.cpp | 41 +++---------------- bolt/src/RewriteInstance.h | 2 +- bolt/src/Utils.cpp | 50 +++++++++++++++++++++++ bolt/src/Utils.h | 35 ++++++++++++++++ bolt/src/llvm-bolt.cpp | 12 ++++++ 9 files changed, 226 insertions(+), 45 deletions(-) create mode 100644 bolt/src/MachORewriteInstance.cpp create mode 100644 bolt/src/MachORewriteInstance.h create mode 100644 bolt/src/Utils.cpp create mode 100644 bolt/src/Utils.h diff --git a/bolt/src/BinarySection.h b/bolt/src/BinarySection.h index c8f5b40de324..6d7a6b88c518 100644 --- a/bolt/src/BinarySection.h +++ b/bolt/src/BinarySection.h @@ -89,12 +89,13 @@ class BinarySection { } static StringRef getContents(SectionRef Section) { StringRef Contents; - if (ELFSectionRef(Section).getType() != ELF::SHT_NOBITS) { - if (auto EC = Section.getContents(Contents)) { - errs() << "BOLT-ERROR: cannot get section contents for " - << getName(Section) << ": " << EC.message() << ".\n"; - exit(1); - } + if (Section.getObject()->isELF() && ELFSectionRef(Section).getType() == ELF::SHT_NOBITS) + return Contents; + + if (auto EC = Section.getContents(Contents)) { + errs() << "BOLT-ERROR: cannot get section contents for " + << getName(Section) << ": " << EC.message() << ".\n"; + exit(1); } return Contents; } @@ -152,10 +153,12 @@ public: Address(Section.getAddress()), Size(Section.getSize()), Alignment(Section.getAlignment()), - ELFType(ELFSectionRef(Section).getType()), - ELFFlags(ELFSectionRef(Section).getFlags()), IsLocal(IsLocal || StringRef(Name).startswith(".local.")), OutputName(Name) { + if (Section.getObject()->isELF()) { + ELFType = ELFSectionRef(Section).getType(); + ELFFlags = ELFSectionRef(Section).getFlags(); + } } // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method. diff --git a/bolt/src/CMakeLists.txt b/bolt/src/CMakeLists.txt index 34b846a40426..8088e9fae2c9 100644 --- a/bolt/src/CMakeLists.txt +++ b/bolt/src/CMakeLists.txt @@ -92,12 +92,14 @@ add_llvm_tool(llvm-bolt ExecutableFileMemoryManager.cpp Heatmap.cpp JumpTable.cpp + MachORewriteInstance.cpp MCPlusBuilder.cpp ParallelUtilities.cpp ProfileReader.cpp ProfileWriter.cpp Relocation.cpp RewriteInstance.cpp + Utils.cpp DEPENDS intrinsics_gen diff --git a/bolt/src/MachORewriteInstance.cpp b/bolt/src/MachORewriteInstance.cpp new file mode 100644 index 000000000000..d23cce627624 --- /dev/null +++ b/bolt/src/MachORewriteInstance.cpp @@ -0,0 +1,67 @@ +//===--- MachORewriteInstance.cpp - Instance of a rewriting process. ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "MachORewriteInstance.h" +#include "BinaryContext.h" +#include "BinaryFunction.h" +#include "Utils.h" +#include "llvm/Support/Timer.h" + +namespace opts { + +using namespace llvm; +extern cl::opt PrintSections; + +} // namespace opts + +namespace llvm { +namespace bolt { + +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt" + +MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile, + DataReader &DR) + : InputFile(InputFile), + BC(BinaryContext::createBinaryContext( + InputFile, DR, + DWARFContext::create(*InputFile, nullptr, + DWARFContext::defaultErrorHandler, "", false))) { +} + +void MachORewriteInstance::readSpecialSections() { + for (const auto &Section : InputFile->sections()) { + StringRef SectionName; + check_error(Section.getName(SectionName), "cannot get section name"); + // Only register sections with names. + if (!SectionName.empty()) { + BC->registerSection(Section); + DEBUG(dbgs() << "BOLT-DEBUG: registering section " << SectionName + << " @ 0x" << Twine::utohexstr(Section.getAddress()) << ":0x" + << Twine::utohexstr(Section.getAddress() + Section.getSize()) + << "\n"); + } + } + + if (opts::PrintSections) { + outs() << "BOLT-INFO: Sections from original binary:\n"; + BC->printSections(outs()); + } +} + +void MachORewriteInstance::run() { + readSpecialSections(); +} + +MachORewriteInstance::~MachORewriteInstance() {} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/src/MachORewriteInstance.h b/bolt/src/MachORewriteInstance.h new file mode 100644 index 000000000000..9c1a192192ab --- /dev/null +++ b/bolt/src/MachORewriteInstance.h @@ -0,0 +1,43 @@ +//===--- MachORewriteInstance.h - Instance of a rewriting process. --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface to control an instance of a macho binary rewriting process. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_MACHO_REWRITE_INSTANCE_H +#define LLVM_TOOLS_LLVM_BOLT_MACHO_REWRITE_INSTANCE_H + +#include "llvm/Object/MachO.h" +#include + +namespace llvm { +namespace bolt { + +class BinaryContext; +class DataReader; + +class MachORewriteInstance { + object::MachOObjectFile *InputFile; + std::unique_ptr BC; + + void readSpecialSections(); + +public: + MachORewriteInstance(object::MachOObjectFile *InputFile, DataReader &DR); + ~MachORewriteInstance(); + + /// Run all the necessary steps to read, optimize and rewrite the binary. + void run(); +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/src/RewriteInstance.cpp b/bolt/src/RewriteInstance.cpp index fd8efbacbb27..7cc77a7f6455 100644 --- a/bolt/src/RewriteInstance.cpp +++ b/bolt/src/RewriteInstance.cpp @@ -27,6 +27,7 @@ #include "ProfileReader.h" #include "ProfileWriter.h" #include "Relocation.h" +#include "Utils.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -292,12 +293,7 @@ PrintGlobals("print-globals", cl::Hidden, cl::cat(BoltCategory)); -static cl::opt -PrintSections("print-sections", - cl::desc("print all registered sections"), - cl::ZeroOrMore, - cl::Hidden, - cl::cat(BoltCategory)); +extern cl::opt PrintSections; static cl::opt PrintLoopInfo("print-loops", @@ -539,38 +535,11 @@ const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; namespace llvm { namespace bolt { + extern const char *BoltRevision; -void report_error(StringRef Message, std::error_code EC) { - assert(EC); - errs() << "BOLT-ERROR: '" << Message << "': " << EC.message() << ".\n"; - exit(1); -} - -void report_error(StringRef Message, Error E) { - assert(E); - errs() << "BOLT-ERROR: '" << Message << "': " << toString(std::move(E)) - << ".\n"; - exit(1); -} - -void check_error(std::error_code EC, StringRef Message) { - if (!EC) - return; - report_error(Message, EC); -} - -void check_error(Error E, Twine Message) { - if (!E) - return; - handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) { - llvm::errs() << "BOLT-ERROR: '" << Message << "': " << EIB.message() - << '\n'; - exit(1); - }); -} -} -} +} // namespace bolt +} // namespace llvm namespace { diff --git a/bolt/src/RewriteInstance.h b/bolt/src/RewriteInstance.h index 9f7ab5ec7fe3..36501fe1e87c 100644 --- a/bolt/src/RewriteInstance.h +++ b/bolt/src/RewriteInstance.h @@ -1,4 +1,4 @@ -//===--- RewriteInstance.h - Interface for machine-level function ---------===// +//===--- RewriteInstance.h - Instance of a rewriting process. -------------===// // // The LLVM Compiler Infrastructure // diff --git a/bolt/src/Utils.cpp b/bolt/src/Utils.cpp new file mode 100644 index 000000000000..b91e65304284 --- /dev/null +++ b/bolt/src/Utils.cpp @@ -0,0 +1,50 @@ +//===--- Utils.cpp - Common helper functions ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common helper functions. +// +//===----------------------------------------------------------------------===// + +#include "Utils.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace bolt { + +void report_error(StringRef Message, std::error_code EC) { + assert(EC); + errs() << "BOLT-ERROR: '" << Message << "': " << EC.message() << ".\n"; + exit(1); +} + +void report_error(StringRef Message, Error E) { + assert(E); + errs() << "BOLT-ERROR: '" << Message << "': " << toString(std::move(E)) + << ".\n"; + exit(1); +} + +void check_error(std::error_code EC, StringRef Message) { + if (!EC) + return; + report_error(Message, EC); +} + +void check_error(Error E, Twine Message) { + if (!E) + return; + handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) { + llvm::errs() << "BOLT-ERROR: '" << Message << "': " << EIB.message() + << '\n'; + exit(1); + }); +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/src/Utils.h b/bolt/src/Utils.h new file mode 100644 index 000000000000..4d2b921c32bc --- /dev/null +++ b/bolt/src/Utils.h @@ -0,0 +1,35 @@ +//===--- Utils.h - Common helper functions --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common helper functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_UTILS_H +#define LLVM_TOOLS_LLVM_BOLT_UTILS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace bolt { + +void report_error(StringRef Message, std::error_code EC); + +void report_error(StringRef Message, Error E); + +void check_error(std::error_code EC, StringRef Message); + +void check_error(Error E, Twine Message); + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/src/llvm-bolt.cpp b/bolt/src/llvm-bolt.cpp index 8ecd79217aa1..6cbac9d50b1e 100644 --- a/bolt/src/llvm-bolt.cpp +++ b/bolt/src/llvm-bolt.cpp @@ -15,6 +15,7 @@ #include "DataAggregator.h" #include "DataReader.h" +#include "MachORewriteInstance.h" #include "RewriteInstance.h" #include "llvm/Object/Binary.h" #include "llvm/Support/CommandLine.h" @@ -106,7 +107,15 @@ PerfDataA("p", cl::aliasopt(PerfData), cl::cat(AggregatorCategory)); +cl::opt + PrintSections("print-sections", + cl::desc("print all registered sections"), + cl::ZeroOrMore, + cl::Hidden, + cl::cat(BoltCategory)); + } // namespace opts + static StringRef ToolName; static void report_error(StringRef Message, std::error_code EC) { @@ -324,6 +333,9 @@ int main(int argc, char **argv) { if (auto *e = dyn_cast(&Binary)) { RewriteInstance RI(e, *DR.get(), *DA.get(), argc, argv, ToolPath); RI.run(); + } else if (auto *O = dyn_cast(&Binary)) { + MachORewriteInstance MachORI(O, *DR); + MachORI.run(); } else { report_error(opts::InputFilename, object_error::invalid_file_type); }