[BOLT] Add option to verify instruction encoder/decoder

Summary:
Add option `-check-encoding` to verify if the input to LLVM disassembler
matches the output of the assembler. When set, the verification runs on
every instruction in processed functions.

I'm not enabling the option by default as it could be quite noisy on x86
where instruction encoding is ambiguous and can include redundant
prefixes.

(cherry picked from FBD16595415)
This commit is contained in:
Maksim Panchenko 2019-07-31 16:03:49 -07:00
parent 79ff4ec1cb
commit 8d5854ef09
3 changed files with 48 additions and 8 deletions

View File

@ -685,11 +685,9 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
<< " starting at offset "
<< (Offset - BF.getSize()) << " in function "
<< BF << '\n';
for (auto I = BF.getSize(); I < BF.getMaxSize(); ++I) {
errs() << format("%.2x ", (*FunctionData)[I]);
}
errs() << '\n';
<< BF << '\n'
<< FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
<< '\n';
}
return false;

View File

@ -20,6 +20,7 @@
#include "JumpTable.h"
#include "MCPlusBuilder.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@ -46,6 +47,7 @@
#include <shared_mutex>
#include <string>
#include <system_error>
#include <type_traits>
#include <unordered_map>
#include <vector>
@ -1039,6 +1041,18 @@ public:
}
};
template <typename T,
typename = std::enable_if_t<sizeof(T) == 1> >
inline raw_ostream &operator<<(raw_ostream &OS,
const ArrayRef<T> &ByteArray) {
const char *Sep = "";
for (const auto Byte : ByteArray) {
OS << Sep << format("%.2x", Byte);
Sep = " ";
}
return OS;
}
} // namespace bolt
} // namespace llvm

View File

@ -81,11 +81,15 @@ AlignMacroOpFusion("align-macro-fusion",
cl::cat(BoltRelocCategory));
cl::opt<bool>
PreserveBlocksAlignment("preserve-blocks-alignment",
cl::desc("try to preserve basic block alignment"),
CheckEncoding("check-encoding",
cl::desc("perform verification of LLVM instruction encoding/decoding. "
"Every instruction in the input is decoded and re-encoded. "
"If the resulting bytes do not match the input, a warning message "
"is printed."),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::Hidden,
cl::cat(BoltCategory));
static cl::opt<bool>
DotToolTipCode("dot-tooltip-code",
@ -114,6 +118,13 @@ JumpTables("jump-tables",
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<bool>
PreserveBlocksAlignment("preserve-blocks-alignment",
cl::desc("try to preserve basic block alignment"),
cl::init(false),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::opt<bool>
PrintDynoStats("dyno-stats",
cl::desc("print execution info based on profile"),
@ -1016,6 +1027,23 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
break;
}
// Check integrity of LLVM assembler/disassembler.
if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
!BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
BC.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
if (FunctionData.slice(Offset, Size) != EncodedData) {
errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
<< "function " << *this << ":\n";
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
errs() << " input: " << FunctionData.slice(Offset, Size)
<< "\n output: " << EncodedData << "\n\n";
}
}
// Cannot process functions with AVX-512 instructions.
if (MIB->hasEVEXEncoding(Instruction)) {
if (opts::Verbosity >= 1) {