forked from OSchip/llvm-project
[BOLT] Add option to verify instruction encoder/decoder
Summary: Add option `-check-encoding` to verify if the input to LLVM disassembler matches the output of the assembler. When set, the verification runs on every instruction in processed functions. I'm not enabling the option by default as it could be quite noisy on x86 where instruction encoding is ambiguous and can include redundant prefixes. (cherry picked from FBD16595415)
This commit is contained in:
parent
79ff4ec1cb
commit
8d5854ef09
|
@ -685,11 +685,9 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
|
|||
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
|
||||
<< " starting at offset "
|
||||
<< (Offset - BF.getSize()) << " in function "
|
||||
<< BF << '\n';
|
||||
for (auto I = BF.getSize(); I < BF.getMaxSize(); ++I) {
|
||||
errs() << format("%.2x ", (*FunctionData)[I]);
|
||||
}
|
||||
errs() << '\n';
|
||||
<< BF << '\n'
|
||||
<< FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
|
||||
<< '\n';
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "JumpTable.h"
|
||||
#include "MCPlusBuilder.h"
|
||||
#include "llvm/ADT/iterator.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
|
||||
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||
|
@ -46,6 +47,7 @@
|
|||
#include <shared_mutex>
|
||||
#include <string>
|
||||
#include <system_error>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
|
@ -1039,6 +1041,18 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<sizeof(T) == 1> >
|
||||
inline raw_ostream &operator<<(raw_ostream &OS,
|
||||
const ArrayRef<T> &ByteArray) {
|
||||
const char *Sep = "";
|
||||
for (const auto Byte : ByteArray) {
|
||||
OS << Sep << format("%.2x", Byte);
|
||||
Sep = " ";
|
||||
}
|
||||
return OS;
|
||||
}
|
||||
|
||||
} // namespace bolt
|
||||
} // namespace llvm
|
||||
|
||||
|
|
|
@ -81,11 +81,15 @@ AlignMacroOpFusion("align-macro-fusion",
|
|||
cl::cat(BoltRelocCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
PreserveBlocksAlignment("preserve-blocks-alignment",
|
||||
cl::desc("try to preserve basic block alignment"),
|
||||
CheckEncoding("check-encoding",
|
||||
cl::desc("perform verification of LLVM instruction encoding/decoding. "
|
||||
"Every instruction in the input is decoded and re-encoded. "
|
||||
"If the resulting bytes do not match the input, a warning message "
|
||||
"is printed."),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
DotToolTipCode("dot-tooltip-code",
|
||||
|
@ -114,6 +118,13 @@ JumpTables("jump-tables",
|
|||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
PreserveBlocksAlignment("preserve-blocks-alignment",
|
||||
cl::desc("try to preserve basic block alignment"),
|
||||
cl::init(false),
|
||||
cl::ZeroOrMore,
|
||||
cl::cat(BoltOptCategory));
|
||||
|
||||
cl::opt<bool>
|
||||
PrintDynoStats("dyno-stats",
|
||||
cl::desc("print execution info based on profile"),
|
||||
|
@ -1016,6 +1027,23 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
|
|||
break;
|
||||
}
|
||||
|
||||
// Check integrity of LLVM assembler/disassembler.
|
||||
if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
|
||||
!BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
|
||||
SmallString<256> Code;
|
||||
SmallVector<MCFixup, 4> Fixups;
|
||||
raw_svector_ostream VecOS(Code);
|
||||
BC.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
|
||||
auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
|
||||
if (FunctionData.slice(Offset, Size) != EncodedData) {
|
||||
errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
|
||||
<< "function " << *this << ":\n";
|
||||
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
|
||||
errs() << " input: " << FunctionData.slice(Offset, Size)
|
||||
<< "\n output: " << EncodedData << "\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Cannot process functions with AVX-512 instructions.
|
||||
if (MIB->hasEVEXEncoding(Instruction)) {
|
||||
if (opts::Verbosity >= 1) {
|
||||
|
|
Loading…
Reference in New Issue