forked from OSchip/llvm-project
Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit
incarnations), integrated into the MC framework. The disassembler is table-driven, using a custom TableGen backend to generate hierarchical tables optimized for fast decode. The disassembler consumes MemoryObjects and produces arrays of MCInsts, adhering to the abstract base class MCDisassembler (llvm/MC/MCDisassembler.h). The disassembler is documented in detail in - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime) - utils/TableGen/DisassemblerEmitter.cpp (table emitter) You can test the disassembler by running llvm-mc -disassemble for i386 or x86_64 targets. Please let me know if you encounter any problems with it. llvm-svn: 91749
This commit is contained in:
parent
6fda43f4c1
commit
04cc307edd
|
@ -2,7 +2,7 @@ set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC)
|
|||
set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
|
||||
|
@ -11,12 +11,12 @@ set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem)
|
|||
set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
|
||||
set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
|
||||
set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
|
||||
set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
|
||||
|
@ -31,7 +31,7 @@ set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVM
|
|||
set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
|
||||
set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
|
||||
set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430CodeGen LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget)
|
||||
|
@ -40,17 +40,17 @@ set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)
|
|||
set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
|
||||
set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
|
||||
set(MSVC_LIB_DEPS_LLVMSystem )
|
||||
set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
|
||||
set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
|
||||
set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem)
|
||||
|
|
|
@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS X86.td)
|
|||
tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
|
||||
tablegen(X86GenRegisterNames.inc -gen-register-enums)
|
||||
tablegen(X86GenRegisterInfo.inc -gen-register-desc)
|
||||
tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
|
||||
tablegen(X86GenInstrNames.inc -gen-instr-enums)
|
||||
tablegen(X86GenInstrInfo.inc -gen-instr-desc)
|
||||
tablegen(X86GenAsmWriter.inc -gen-asm-writer)
|
||||
|
|
|
@ -2,5 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
|
|||
|
||||
add_llvm_library(LLVMX86Disassembler
|
||||
X86Disassembler.cpp
|
||||
X86DisassemblerDecoder.c
|
||||
)
|
||||
add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
|
||||
|
|
|
@ -6,18 +6,450 @@
|
|||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is part of the X86 Disassembler.
|
||||
// It contains code to translate the data produced by the decoder into
|
||||
// MCInsts.
|
||||
// Documentation for the disassembler can be found in X86Disassembler.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86Disassembler.h"
|
||||
#include "X86DisassemblerDecoder.h"
|
||||
#include "X86InstrInfo.h"
|
||||
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Target/TargetRegistry.h"
|
||||
#include "X86.h"
|
||||
#include "llvm/Support/MemoryObject.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
using namespace llvm::X86Disassembler;
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// Fill-ins to make the compiler happy. These constants are never actually
|
||||
// assigned; they are just filler to make an automatically-generated switch
|
||||
// statement work.
|
||||
namespace X86 {
|
||||
enum {
|
||||
BX_SI = 500,
|
||||
BX_DI = 501,
|
||||
BP_SI = 502,
|
||||
BP_DI = 503,
|
||||
sib = 504,
|
||||
sib64 = 505
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void translateInstruction(MCInst &target,
|
||||
InternalInstruction &source);
|
||||
|
||||
X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
|
||||
MCDisassembler(),
|
||||
fMode(mode) {
|
||||
}
|
||||
|
||||
X86GenericDisassembler::~X86GenericDisassembler() {
|
||||
}
|
||||
|
||||
/// regionReader - a callback function that wraps the readByte method from
|
||||
/// MemoryObject.
|
||||
///
|
||||
/// @param arg - The generic callback parameter. In this case, this should
|
||||
/// be a pointer to a MemoryObject.
|
||||
/// @param byte - A pointer to the byte to be read.
|
||||
/// @param address - The address to be read.
|
||||
static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
|
||||
MemoryObject* region = static_cast<MemoryObject*>(arg);
|
||||
return region->readByte(address, byte);
|
||||
}
|
||||
|
||||
/// logger - a callback function that wraps the operator<< method from
|
||||
/// raw_ostream.
|
||||
///
|
||||
/// @param arg - The generic callback parameter. This should be a pointe
|
||||
/// to a raw_ostream.
|
||||
/// @param log - A string to be logged. logger() adds a newline.
|
||||
static void logger(void* arg, const char* log) {
|
||||
if (!arg)
|
||||
return;
|
||||
|
||||
raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
|
||||
vStream << log << "\n";
|
||||
}
|
||||
|
||||
//
|
||||
// Public interface for the disassembler
|
||||
//
|
||||
|
||||
bool X86GenericDisassembler::getInstruction(MCInst &instr,
|
||||
uint64_t &size,
|
||||
const MemoryObject ®ion,
|
||||
uint64_t address,
|
||||
raw_ostream &vStream) const {
|
||||
InternalInstruction internalInstr;
|
||||
|
||||
int ret = decodeInstruction(&internalInstr,
|
||||
regionReader,
|
||||
(void*)®ion,
|
||||
logger,
|
||||
(void*)&vStream,
|
||||
address,
|
||||
fMode);
|
||||
|
||||
if(ret) {
|
||||
size = internalInstr.readerCursor - address;
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
size = internalInstr.length;
|
||||
translateInstruction(instr, internalInstr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Private code that translates from struct InternalInstructions to MCInsts.
|
||||
//
|
||||
|
||||
/// translateRegister - Translates an internal register to the appropriate LLVM
|
||||
/// register, and appends it as an operand to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param reg - The Reg to append.
|
||||
static void translateRegister(MCInst &mcInst, Reg reg) {
|
||||
#define ENTRY(x) X86::x,
|
||||
uint8_t llvmRegnums[] = {
|
||||
ALL_REGS
|
||||
0
|
||||
};
|
||||
#undef ENTRY
|
||||
|
||||
uint8_t llvmRegnum = llvmRegnums[reg];
|
||||
mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
|
||||
}
|
||||
|
||||
/// translateImmediate - Appends an immediate operand to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param immediate - The immediate value to append.
|
||||
static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
|
||||
mcInst.addOperand(MCOperand::CreateImm(immediate));
|
||||
}
|
||||
|
||||
/// translateRMRegister - Translates a register stored in the R/M field of the
|
||||
/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param insn - The internal instruction to extract the R/M field
|
||||
/// from.
|
||||
static void translateRMRegister(MCInst &mcInst,
|
||||
InternalInstruction &insn) {
|
||||
assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 &&
|
||||
"A R/M register operand may not have a SIB byte");
|
||||
|
||||
switch (insn.eaBase) {
|
||||
case EA_BASE_NONE:
|
||||
llvm_unreachable("EA_BASE_NONE for ModR/M base");
|
||||
break;
|
||||
#define ENTRY(x) case EA_BASE_##x:
|
||||
ALL_EA_BASES
|
||||
#undef ENTRY
|
||||
llvm_unreachable("A R/M register operand may not have a base; "
|
||||
"the operand must be a register.");
|
||||
break;
|
||||
#define ENTRY(x) \
|
||||
case EA_REG_##x: \
|
||||
mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
|
||||
ALL_REGS
|
||||
#undef ENTRY
|
||||
default:
|
||||
llvm_unreachable("Unexpected EA base register");
|
||||
}
|
||||
}
|
||||
|
||||
/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
|
||||
/// fields of an internal instruction (and possibly its SIB byte) to a memory
|
||||
/// operand in LLVM's format, and appends it to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
|
||||
/// from.
|
||||
static void translateRMMemory(MCInst &mcInst,
|
||||
InternalInstruction &insn) {
|
||||
// Addresses in an MCInst are represented as five operands:
|
||||
// 1. basereg (register) The R/M base, or (if there is a SIB) the
|
||||
// SIB base
|
||||
// 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
|
||||
// scale amount
|
||||
// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
|
||||
// the index (which is multiplied by the
|
||||
// scale amount)
|
||||
// 4. displacement (immediate) 0, or the displacement if there is one
|
||||
// 5. segmentreg (register) x86_registerNONE for now, but could be set
|
||||
// if we have segment overrides
|
||||
|
||||
MCOperand baseReg;
|
||||
MCOperand scaleAmount;
|
||||
MCOperand indexReg;
|
||||
MCOperand displacement;
|
||||
MCOperand segmentReg;
|
||||
|
||||
if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
|
||||
if (insn.sibBase != SIB_BASE_NONE) {
|
||||
switch (insn.sibBase) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected sibBase");
|
||||
#define ENTRY(x) \
|
||||
case SIB_BASE_##x: \
|
||||
baseReg = MCOperand::CreateReg(X86::x); break;
|
||||
ALL_SIB_BASES
|
||||
#undef ENTRY
|
||||
}
|
||||
} else {
|
||||
baseReg = MCOperand::CreateReg(0);
|
||||
}
|
||||
|
||||
if (insn.sibIndex != SIB_INDEX_NONE) {
|
||||
switch (insn.sibIndex) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected sibIndex");
|
||||
#define ENTRY(x) \
|
||||
case SIB_INDEX_##x: \
|
||||
indexReg = MCOperand::CreateReg(X86::x); break;
|
||||
EA_BASES_32BIT
|
||||
EA_BASES_64BIT
|
||||
#undef ENTRY
|
||||
}
|
||||
} else {
|
||||
indexReg = MCOperand::CreateReg(0);
|
||||
}
|
||||
|
||||
scaleAmount = MCOperand::CreateImm(insn.sibScale);
|
||||
} else {
|
||||
switch (insn.eaBase) {
|
||||
case EA_BASE_NONE:
|
||||
assert(insn.eaDisplacement != EA_DISP_NONE &&
|
||||
"EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
|
||||
|
||||
if (insn.mode == MODE_64BIT)
|
||||
baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
|
||||
else
|
||||
baseReg = MCOperand::CreateReg(0);
|
||||
|
||||
indexReg = MCOperand::CreateReg(0);
|
||||
break;
|
||||
case EA_BASE_BX_SI:
|
||||
baseReg = MCOperand::CreateReg(X86::BX);
|
||||
indexReg = MCOperand::CreateReg(X86::SI);
|
||||
break;
|
||||
case EA_BASE_BX_DI:
|
||||
baseReg = MCOperand::CreateReg(X86::BX);
|
||||
indexReg = MCOperand::CreateReg(X86::DI);
|
||||
break;
|
||||
case EA_BASE_BP_SI:
|
||||
baseReg = MCOperand::CreateReg(X86::BP);
|
||||
indexReg = MCOperand::CreateReg(X86::SI);
|
||||
break;
|
||||
case EA_BASE_BP_DI:
|
||||
baseReg = MCOperand::CreateReg(X86::BP);
|
||||
indexReg = MCOperand::CreateReg(X86::DI);
|
||||
break;
|
||||
default:
|
||||
indexReg = MCOperand::CreateReg(0);
|
||||
switch (insn.eaBase) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected eaBase");
|
||||
break;
|
||||
// Here, we will use the fill-ins defined above. However,
|
||||
// BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
|
||||
// sib and sib64 were handled in the top-level if, so they're only
|
||||
// placeholders to keep the compiler happy.
|
||||
#define ENTRY(x) \
|
||||
case EA_BASE_##x: \
|
||||
baseReg = MCOperand::CreateReg(X86::x); break;
|
||||
ALL_EA_BASES
|
||||
#undef ENTRY
|
||||
#define ENTRY(x) case EA_REG_##x:
|
||||
ALL_REGS
|
||||
#undef ENTRY
|
||||
llvm_unreachable("A R/M memory operand may not be a register; "
|
||||
"the base field must be a base.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
displacement = MCOperand::CreateImm(insn.displacement);
|
||||
|
||||
static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
|
||||
0, // SEG_OVERRIDE_NONE
|
||||
X86::CS,
|
||||
X86::SS,
|
||||
X86::DS,
|
||||
X86::ES,
|
||||
X86::FS,
|
||||
X86::GS
|
||||
};
|
||||
|
||||
segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
|
||||
|
||||
mcInst.addOperand(baseReg);
|
||||
mcInst.addOperand(scaleAmount);
|
||||
mcInst.addOperand(indexReg);
|
||||
mcInst.addOperand(displacement);
|
||||
mcInst.addOperand(segmentReg);
|
||||
}
|
||||
|
||||
/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
|
||||
/// byte of an instruction to LLVM form, and appends it to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param operand - The operand, as stored in the descriptor table.
|
||||
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
|
||||
/// from.
|
||||
static void translateRM(MCInst &mcInst,
|
||||
OperandSpecifier &operand,
|
||||
InternalInstruction &insn) {
|
||||
switch (operand.type) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected type for a R/M operand");
|
||||
case TYPE_R8:
|
||||
case TYPE_R16:
|
||||
case TYPE_R32:
|
||||
case TYPE_R64:
|
||||
case TYPE_Rv:
|
||||
case TYPE_MM:
|
||||
case TYPE_MM32:
|
||||
case TYPE_MM64:
|
||||
case TYPE_XMM:
|
||||
case TYPE_XMM32:
|
||||
case TYPE_XMM64:
|
||||
case TYPE_XMM128:
|
||||
case TYPE_DEBUGREG:
|
||||
case TYPE_CR32:
|
||||
case TYPE_CR64:
|
||||
translateRMRegister(mcInst, insn);
|
||||
break;
|
||||
case TYPE_M:
|
||||
case TYPE_M8:
|
||||
case TYPE_M16:
|
||||
case TYPE_M32:
|
||||
case TYPE_M64:
|
||||
case TYPE_M128:
|
||||
case TYPE_M512:
|
||||
case TYPE_Mv:
|
||||
case TYPE_M32FP:
|
||||
case TYPE_M64FP:
|
||||
case TYPE_M80FP:
|
||||
case TYPE_M16INT:
|
||||
case TYPE_M32INT:
|
||||
case TYPE_M64INT:
|
||||
case TYPE_M1616:
|
||||
case TYPE_M1632:
|
||||
case TYPE_M1664:
|
||||
translateRMMemory(mcInst, insn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// translateFPRegister - Translates a stack position on the FPU stack to its
|
||||
/// LLVM form, and appends it to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param stackPos - The stack position to translate.
|
||||
static void translateFPRegister(MCInst &mcInst,
|
||||
uint8_t stackPos) {
|
||||
assert(stackPos < 8 && "Invalid FP stack position");
|
||||
|
||||
mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
|
||||
}
|
||||
|
||||
/// translateOperand - Translates an operand stored in an internal instruction
|
||||
/// to LLVM's format and appends it to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to append to.
|
||||
/// @param operand - The operand, as stored in the descriptor table.
|
||||
/// @param insn - The internal instruction.
|
||||
static void translateOperand(MCInst &mcInst,
|
||||
OperandSpecifier &operand,
|
||||
InternalInstruction &insn) {
|
||||
switch (operand.encoding) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled operand encoding during translation");
|
||||
case ENCODING_REG:
|
||||
translateRegister(mcInst, insn.reg);
|
||||
break;
|
||||
case ENCODING_RM:
|
||||
translateRM(mcInst, operand, insn);
|
||||
break;
|
||||
case ENCODING_CB:
|
||||
case ENCODING_CW:
|
||||
case ENCODING_CD:
|
||||
case ENCODING_CP:
|
||||
case ENCODING_CO:
|
||||
case ENCODING_CT:
|
||||
llvm_unreachable("Translation of code offsets isn't supported.");
|
||||
case ENCODING_IB:
|
||||
case ENCODING_IW:
|
||||
case ENCODING_ID:
|
||||
case ENCODING_IO:
|
||||
case ENCODING_Iv:
|
||||
case ENCODING_Ia:
|
||||
translateImmediate(mcInst,
|
||||
insn.immediates[insn.numImmediatesTranslated++]);
|
||||
break;
|
||||
case ENCODING_RB:
|
||||
case ENCODING_RW:
|
||||
case ENCODING_RD:
|
||||
case ENCODING_RO:
|
||||
translateRegister(mcInst, insn.opcodeRegister);
|
||||
break;
|
||||
case ENCODING_I:
|
||||
translateFPRegister(mcInst, insn.opcodeModifier);
|
||||
break;
|
||||
case ENCODING_Rv:
|
||||
translateRegister(mcInst, insn.opcodeRegister);
|
||||
break;
|
||||
case ENCODING_DUP:
|
||||
translateOperand(mcInst,
|
||||
insn.spec->operands[operand.type - TYPE_DUP0],
|
||||
insn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// translateInstruction - Translates an internal instruction and all its
|
||||
/// operands to an MCInst.
|
||||
///
|
||||
/// @param mcInst - The MCInst to populate with the instruction's data.
|
||||
/// @param insn - The internal instruction.
|
||||
static void translateInstruction(MCInst &mcInst,
|
||||
InternalInstruction &insn) {
|
||||
assert(insn.spec);
|
||||
|
||||
mcInst.setOpcode(insn.instructionID);
|
||||
|
||||
int index;
|
||||
|
||||
insn.numImmediatesTranslated = 0;
|
||||
|
||||
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
|
||||
if (insn.spec->operands[index].encoding != ENCODING_NONE)
|
||||
translateOperand(mcInst, insn.spec->operands[index], insn);
|
||||
}
|
||||
}
|
||||
|
||||
static const MCDisassembler *createX86_32Disassembler(const Target &T) {
|
||||
return 0;
|
||||
return new X86Disassembler::X86_32Disassembler;
|
||||
}
|
||||
|
||||
static const MCDisassembler *createX86_64Disassembler(const Target &T) {
|
||||
return 0;
|
||||
return new X86Disassembler::X86_64Disassembler;
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializeX86Disassembler() {
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
|
||||
// 64-bit X86 instruction sets. The main decode sequence for an assembly
|
||||
// instruction in this disassembler is:
|
||||
//
|
||||
// 1. Read the prefix bytes and determine the attributes of the instruction.
|
||||
// These attributes, recorded in enum attributeBits
|
||||
// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
|
||||
// provides a mapping from bitmasks to contexts, which are represented by
|
||||
// enum InstructionContext (ibid.).
|
||||
//
|
||||
// 2. Read the opcode, and determine what kind of opcode it is. The
|
||||
// disassembler distinguishes four kinds of opcodes, which are enumerated in
|
||||
// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
|
||||
// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
|
||||
// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
|
||||
//
|
||||
// 3. Depending on the opcode type, look in one of four ClassDecision structures
|
||||
// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
|
||||
// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
|
||||
// a ModRMDecision (ibid.).
|
||||
//
|
||||
// 4. Some instructions, such as escape opcodes or extended opcodes, or even
|
||||
// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
|
||||
// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
|
||||
// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
|
||||
// ModR/M byte is required and how to interpret it.
|
||||
//
|
||||
// 5. After resolving the ModRMDecision, the disassembler has a unique ID
|
||||
// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
|
||||
// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
|
||||
// meanings of its operands.
|
||||
//
|
||||
// 6. For each operand, its encoding is an entry from OperandEncoding
|
||||
// (X86DisassemblerDecoderCommon.h) and its type is an entry from
|
||||
// OperandType (ibid.). The encoding indicates how to read it from the
|
||||
// instruction; the type indicates how to interpret the value once it has
|
||||
// been read. For example, a register operand could be stored in the R/M
|
||||
// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
|
||||
// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
|
||||
// register, for instance). Given this information, the operands can be
|
||||
// extracted and interpreted.
|
||||
//
|
||||
// 7. As the last step, the disassembler translates the instruction information
|
||||
// and operands into a format understandable by the client - in this case, an
|
||||
// MCInst for use by the MC infrastructure.
|
||||
//
|
||||
// The disassembler is broken broadly into two parts: the table emitter that
|
||||
// emits the instruction decode tables discussed above during compilation, and
|
||||
// the disassembler itself. The table emitter is documented in more detail in
|
||||
// utils/TableGen/X86DisassemblerEmitter.h.
|
||||
//
|
||||
// X86Disassembler.h contains the public interface for the disassembler,
|
||||
// adhering to the MCDisassembler interface.
|
||||
// X86Disassembler.cpp contains the code responsible for step 7, and for
|
||||
// invoking the decoder to execute steps 1-6.
|
||||
// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
|
||||
// table emitter and the disassembler.
|
||||
// X86DisassemblerDecoder.h contains the public interface of the decoder,
|
||||
// factored out into C for possible use by other projects.
|
||||
// X86DisassemblerDecoder.c contains the source code of the decoder, which is
|
||||
// responsible for steps 1-6.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef X86DISASSEMBLER_H
|
||||
#define X86DISASSEMBLER_H
|
||||
|
||||
#define INSTRUCTION_SPECIFIER_FIELDS \
|
||||
const char* name;
|
||||
|
||||
#define INSTRUCTION_IDS \
|
||||
InstrUID* instructionIDs;
|
||||
|
||||
#include "X86DisassemblerDecoderCommon.h"
|
||||
|
||||
#undef INSTRUCTION_SPECIFIER_FIELDS
|
||||
#undef INSTRUCTION_IDS
|
||||
|
||||
#include "llvm/MC/MCDisassembler.h"
|
||||
|
||||
struct InternalInstruction;
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCInst;
|
||||
class MemoryObject;
|
||||
class raw_ostream;
|
||||
|
||||
namespace X86Disassembler {
|
||||
|
||||
/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
|
||||
/// All each platform class should have to do is subclass the constructor, and
|
||||
/// provide a different disassemblerMode value.
|
||||
class X86GenericDisassembler : public MCDisassembler {
|
||||
protected:
|
||||
/// Constructor - Initializes the disassembler.
|
||||
///
|
||||
/// @param mode - The X86 architecture mode to decode for.
|
||||
X86GenericDisassembler(DisassemblerMode mode);
|
||||
public:
|
||||
~X86GenericDisassembler();
|
||||
|
||||
/// getInstruction - See MCDisassembler.
|
||||
bool getInstruction(MCInst &instr,
|
||||
uint64_t &size,
|
||||
const MemoryObject ®ion,
|
||||
uint64_t address,
|
||||
raw_ostream &vStream) const;
|
||||
private:
|
||||
DisassemblerMode fMode;
|
||||
};
|
||||
|
||||
/// X86_16Disassembler - 16-bit X86 disassembler.
|
||||
class X86_16Disassembler : public X86GenericDisassembler {
|
||||
public:
|
||||
X86_16Disassembler() :
|
||||
X86GenericDisassembler(MODE_16BIT) {
|
||||
}
|
||||
};
|
||||
|
||||
/// X86_16Disassembler - 32-bit X86 disassembler.
|
||||
class X86_32Disassembler : public X86GenericDisassembler {
|
||||
public:
|
||||
X86_32Disassembler() :
|
||||
X86GenericDisassembler(MODE_32BIT) {
|
||||
}
|
||||
};
|
||||
|
||||
/// X86_16Disassembler - 64-bit X86 disassembler.
|
||||
class X86_64Disassembler : public X86GenericDisassembler {
|
||||
public:
|
||||
X86_64Disassembler() :
|
||||
X86GenericDisassembler(MODE_64BIT) {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace X86Disassembler
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,515 @@
|
|||
/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is distributed under the University of Illinois Open Source
|
||||
* License. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*
|
||||
*
|
||||
* This file is part of the X86 Disassembler.
|
||||
* It contains the public interface of the instruction decoder.
|
||||
* Documentation for the disassembler can be found in X86Disassembler.h.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
#ifndef X86DISASSEMBLERDECODER_H
|
||||
#define X86DISASSEMBLERDECODER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define INSTRUCTION_SPECIFIER_FIELDS \
|
||||
const char* name;
|
||||
|
||||
#define INSTRUCTION_IDS \
|
||||
InstrUID* instructionIDs;
|
||||
|
||||
#include "X86DisassemblerDecoderCommon.h"
|
||||
|
||||
#undef INSTRUCTION_SPECIFIER_FIELDS
|
||||
#undef INSTRUCTION_IDS
|
||||
|
||||
/*
|
||||
* Accessor functions for various fields of an Intel instruction
|
||||
*/
|
||||
static inline uint8_t modFromModRM(uint8_t modRM){ return (modRM & 0xc0) >> 6; }
|
||||
static inline uint8_t regFromModRM(uint8_t modRM){ return (modRM & 0x38) >> 3; }
|
||||
static inline uint8_t rmFromModRM(uint8_t modRM) { return (modRM & 0x7); }
|
||||
static inline uint8_t scaleFromSIB(uint8_t sib) { return (sib & 0xc0) >> 6; }
|
||||
static inline uint8_t indexFromSIB(uint8_t sib) { return (sib & 0x38) >> 3; }
|
||||
static inline uint8_t baseFromSIB(uint8_t sib) { return (sib & 0x7); }
|
||||
static inline uint8_t wFromREX(uint8_t rex) { return (rex & 0x8) >> 3; }
|
||||
static inline uint8_t rFromREX(uint8_t rex) { return (rex & 0x4) >> 2; }
|
||||
static inline uint8_t xFromREX(uint8_t rex) { return (rex & 0x2) >> 1; }
|
||||
static inline uint8_t bFromREX(uint8_t rex) { return (rex & 0x1); }
|
||||
|
||||
/*
|
||||
* These enums represent Intel registers for use by the decoder.
|
||||
*/
|
||||
|
||||
#define REGS_8BIT \
|
||||
ENTRY(AL) \
|
||||
ENTRY(CL) \
|
||||
ENTRY(DL) \
|
||||
ENTRY(BL) \
|
||||
ENTRY(AH) \
|
||||
ENTRY(CH) \
|
||||
ENTRY(DH) \
|
||||
ENTRY(BH) \
|
||||
ENTRY(R8B) \
|
||||
ENTRY(R9B) \
|
||||
ENTRY(R10B) \
|
||||
ENTRY(R11B) \
|
||||
ENTRY(R12B) \
|
||||
ENTRY(R13B) \
|
||||
ENTRY(R14B) \
|
||||
ENTRY(R15B) \
|
||||
ENTRY(SPL) \
|
||||
ENTRY(BPL) \
|
||||
ENTRY(SIL) \
|
||||
ENTRY(DIL)
|
||||
|
||||
#define EA_BASES_16BIT \
|
||||
ENTRY(BX_SI) \
|
||||
ENTRY(BX_DI) \
|
||||
ENTRY(BP_SI) \
|
||||
ENTRY(BP_DI) \
|
||||
ENTRY(SI) \
|
||||
ENTRY(DI) \
|
||||
ENTRY(BP) \
|
||||
ENTRY(BX) \
|
||||
ENTRY(R8W) \
|
||||
ENTRY(R9W) \
|
||||
ENTRY(R10W) \
|
||||
ENTRY(R11W) \
|
||||
ENTRY(R12W) \
|
||||
ENTRY(R13W) \
|
||||
ENTRY(R14W) \
|
||||
ENTRY(R15W)
|
||||
|
||||
#define REGS_16BIT \
|
||||
ENTRY(AX) \
|
||||
ENTRY(CX) \
|
||||
ENTRY(DX) \
|
||||
ENTRY(BX) \
|
||||
ENTRY(SP) \
|
||||
ENTRY(BP) \
|
||||
ENTRY(SI) \
|
||||
ENTRY(DI) \
|
||||
ENTRY(R8W) \
|
||||
ENTRY(R9W) \
|
||||
ENTRY(R10W) \
|
||||
ENTRY(R11W) \
|
||||
ENTRY(R12W) \
|
||||
ENTRY(R13W) \
|
||||
ENTRY(R14W) \
|
||||
ENTRY(R15W)
|
||||
|
||||
#define EA_BASES_32BIT \
|
||||
ENTRY(EAX) \
|
||||
ENTRY(ECX) \
|
||||
ENTRY(EDX) \
|
||||
ENTRY(EBX) \
|
||||
ENTRY(sib) \
|
||||
ENTRY(EBP) \
|
||||
ENTRY(ESI) \
|
||||
ENTRY(EDI) \
|
||||
ENTRY(R8D) \
|
||||
ENTRY(R9D) \
|
||||
ENTRY(R10D) \
|
||||
ENTRY(R11D) \
|
||||
ENTRY(R12D) \
|
||||
ENTRY(R13D) \
|
||||
ENTRY(R14D) \
|
||||
ENTRY(R15D)
|
||||
|
||||
#define REGS_32BIT \
|
||||
ENTRY(EAX) \
|
||||
ENTRY(ECX) \
|
||||
ENTRY(EDX) \
|
||||
ENTRY(EBX) \
|
||||
ENTRY(ESP) \
|
||||
ENTRY(EBP) \
|
||||
ENTRY(ESI) \
|
||||
ENTRY(EDI) \
|
||||
ENTRY(R8D) \
|
||||
ENTRY(R9D) \
|
||||
ENTRY(R10D) \
|
||||
ENTRY(R11D) \
|
||||
ENTRY(R12D) \
|
||||
ENTRY(R13D) \
|
||||
ENTRY(R14D) \
|
||||
ENTRY(R15D)
|
||||
|
||||
#define EA_BASES_64BIT \
|
||||
ENTRY(RAX) \
|
||||
ENTRY(RCX) \
|
||||
ENTRY(RDX) \
|
||||
ENTRY(RBX) \
|
||||
ENTRY(sib64) \
|
||||
ENTRY(RBP) \
|
||||
ENTRY(RSI) \
|
||||
ENTRY(RDI) \
|
||||
ENTRY(R8) \
|
||||
ENTRY(R9) \
|
||||
ENTRY(R10) \
|
||||
ENTRY(R11) \
|
||||
ENTRY(R12) \
|
||||
ENTRY(R13) \
|
||||
ENTRY(R14) \
|
||||
ENTRY(R15)
|
||||
|
||||
#define REGS_64BIT \
|
||||
ENTRY(RAX) \
|
||||
ENTRY(RCX) \
|
||||
ENTRY(RDX) \
|
||||
ENTRY(RBX) \
|
||||
ENTRY(RSP) \
|
||||
ENTRY(RBP) \
|
||||
ENTRY(RSI) \
|
||||
ENTRY(RDI) \
|
||||
ENTRY(R8) \
|
||||
ENTRY(R9) \
|
||||
ENTRY(R10) \
|
||||
ENTRY(R11) \
|
||||
ENTRY(R12) \
|
||||
ENTRY(R13) \
|
||||
ENTRY(R14) \
|
||||
ENTRY(R15)
|
||||
|
||||
#define REGS_MMX \
|
||||
ENTRY(MM0) \
|
||||
ENTRY(MM1) \
|
||||
ENTRY(MM2) \
|
||||
ENTRY(MM3) \
|
||||
ENTRY(MM4) \
|
||||
ENTRY(MM5) \
|
||||
ENTRY(MM6) \
|
||||
ENTRY(MM7)
|
||||
|
||||
#define REGS_XMM \
|
||||
ENTRY(XMM0) \
|
||||
ENTRY(XMM1) \
|
||||
ENTRY(XMM2) \
|
||||
ENTRY(XMM3) \
|
||||
ENTRY(XMM4) \
|
||||
ENTRY(XMM5) \
|
||||
ENTRY(XMM6) \
|
||||
ENTRY(XMM7) \
|
||||
ENTRY(XMM8) \
|
||||
ENTRY(XMM9) \
|
||||
ENTRY(XMM10) \
|
||||
ENTRY(XMM11) \
|
||||
ENTRY(XMM12) \
|
||||
ENTRY(XMM13) \
|
||||
ENTRY(XMM14) \
|
||||
ENTRY(XMM15)
|
||||
|
||||
#define REGS_SEGMENT \
|
||||
ENTRY(ES) \
|
||||
ENTRY(CS) \
|
||||
ENTRY(SS) \
|
||||
ENTRY(DS) \
|
||||
ENTRY(FS) \
|
||||
ENTRY(GS)
|
||||
|
||||
#define REGS_DEBUG \
|
||||
ENTRY(DR0) \
|
||||
ENTRY(DR1) \
|
||||
ENTRY(DR2) \
|
||||
ENTRY(DR3) \
|
||||
ENTRY(DR4) \
|
||||
ENTRY(DR5) \
|
||||
ENTRY(DR6) \
|
||||
ENTRY(DR7)
|
||||
|
||||
#define REGS_CONTROL_32BIT \
|
||||
ENTRY(ECR0) \
|
||||
ENTRY(ECR1) \
|
||||
ENTRY(ECR2) \
|
||||
ENTRY(ECR3) \
|
||||
ENTRY(ECR4) \
|
||||
ENTRY(ECR5) \
|
||||
ENTRY(ECR6) \
|
||||
ENTRY(ECR7)
|
||||
|
||||
#define REGS_CONTROL_64BIT \
|
||||
ENTRY(RCR0) \
|
||||
ENTRY(RCR1) \
|
||||
ENTRY(RCR2) \
|
||||
ENTRY(RCR3) \
|
||||
ENTRY(RCR4) \
|
||||
ENTRY(RCR5) \
|
||||
ENTRY(RCR6) \
|
||||
ENTRY(RCR7) \
|
||||
ENTRY(RCR8)
|
||||
|
||||
#define ALL_EA_BASES \
|
||||
EA_BASES_16BIT \
|
||||
EA_BASES_32BIT \
|
||||
EA_BASES_64BIT
|
||||
|
||||
#define ALL_SIB_BASES \
|
||||
REGS_32BIT \
|
||||
REGS_64BIT
|
||||
|
||||
#define ALL_REGS \
|
||||
REGS_8BIT \
|
||||
REGS_16BIT \
|
||||
REGS_32BIT \
|
||||
REGS_64BIT \
|
||||
REGS_MMX \
|
||||
REGS_XMM \
|
||||
REGS_SEGMENT \
|
||||
REGS_DEBUG \
|
||||
REGS_CONTROL_32BIT \
|
||||
REGS_CONTROL_64BIT \
|
||||
ENTRY(RIP)
|
||||
|
||||
/*
|
||||
* EABase - All possible values of the base field for effective-address
|
||||
* computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
|
||||
* distinguish between bases (EA_BASE_*) and registers that just happen to be
|
||||
* referred to when Mod == 0b11 (EA_REG_*).
|
||||
*/
|
||||
typedef enum {
|
||||
EA_BASE_NONE,
|
||||
#define ENTRY(x) EA_BASE_##x,
|
||||
ALL_EA_BASES
|
||||
#undef ENTRY
|
||||
#define ENTRY(x) EA_REG_##x,
|
||||
ALL_REGS
|
||||
#undef ENTRY
|
||||
EA_max
|
||||
} EABase;
|
||||
|
||||
/*
|
||||
* SIBIndex - All possible values of the SIB index field.
|
||||
* Borrows entries from ALL_EA_BASES with the special case that
|
||||
* sib is synonymous with NONE.
|
||||
*/
|
||||
typedef enum {
|
||||
SIB_INDEX_NONE,
|
||||
#define ENTRY(x) SIB_INDEX_##x,
|
||||
ALL_EA_BASES
|
||||
#undef ENTRY
|
||||
SIB_INDEX_max
|
||||
} SIBIndex;
|
||||
|
||||
/*
|
||||
* SIBBase - All possible values of the SIB base field.
|
||||
*/
|
||||
typedef enum {
|
||||
SIB_BASE_NONE,
|
||||
#define ENTRY(x) SIB_BASE_##x,
|
||||
ALL_SIB_BASES
|
||||
#undef ENTRY
|
||||
SIB_BASE_max
|
||||
} SIBBase;
|
||||
|
||||
/*
|
||||
* EADisplacement - Possible displacement types for effective-address
|
||||
* computations.
|
||||
*/
|
||||
typedef enum {
|
||||
EA_DISP_NONE,
|
||||
EA_DISP_8,
|
||||
EA_DISP_16,
|
||||
EA_DISP_32
|
||||
} EADisplacement;
|
||||
|
||||
/*
|
||||
* Reg - All possible values of the reg field in the ModR/M byte.
|
||||
*/
|
||||
typedef enum {
|
||||
#define ENTRY(x) REG_##x,
|
||||
ALL_REGS
|
||||
#undef ENTRY
|
||||
REG_max
|
||||
} Reg;
|
||||
|
||||
/*
|
||||
* SegmentOverride - All possible segment overrides.
|
||||
*/
|
||||
typedef enum {
|
||||
SEG_OVERRIDE_NONE,
|
||||
SEG_OVERRIDE_CS,
|
||||
SEG_OVERRIDE_SS,
|
||||
SEG_OVERRIDE_DS,
|
||||
SEG_OVERRIDE_ES,
|
||||
SEG_OVERRIDE_FS,
|
||||
SEG_OVERRIDE_GS,
|
||||
SEG_OVERRIDE_max
|
||||
} SegmentOverride;
|
||||
|
||||
typedef uint8_t BOOL;
|
||||
|
||||
/*
|
||||
* byteReader_t - Type for the byte reader that the consumer must provide to
|
||||
* the decoder. Reads a single byte from the instruction's address space.
|
||||
* @param arg - A baton that the consumer can associate with any internal
|
||||
* state that it needs.
|
||||
* @param byte - A pointer to a single byte in memory that should be set to
|
||||
* contain the value at address.
|
||||
* @param address - The address in the instruction's address space that should
|
||||
* be read from.
|
||||
* @return - -1 if the byte cannot be read for any reason; 0 otherwise.
|
||||
*/
|
||||
typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
|
||||
|
||||
/*
|
||||
* dlog_t - Type for the logging function that the consumer can provide to
|
||||
* get debugging output from the decoder.
|
||||
* @param arg - A baton that the consumer can associate with any internal
|
||||
* state that it needs.
|
||||
* @param log - A string that contains the message. Will be reused after
|
||||
* the logger returns.
|
||||
*/
|
||||
typedef void (*dlog_t)(void* arg, const char *log);
|
||||
|
||||
/*
|
||||
* The x86 internal instruction, which is produced by the decoder.
|
||||
*/
|
||||
struct InternalInstruction {
|
||||
/* Reader interface (C) */
|
||||
byteReader_t reader;
|
||||
/* Opaque value passed to the reader */
|
||||
void* readerArg;
|
||||
/* The address of the next byte to read via the reader */
|
||||
uint64_t readerCursor;
|
||||
|
||||
/* Logger interface (C) */
|
||||
dlog_t dlog;
|
||||
/* Opaque value passed to the logger */
|
||||
void* dlogArg;
|
||||
|
||||
/* General instruction information */
|
||||
|
||||
/* The mode to disassemble for (64-bit, protected, real) */
|
||||
DisassemblerMode mode;
|
||||
/* The start of the instruction, usable with the reader */
|
||||
uint64_t startLocation;
|
||||
/* The length of the instruction, in bytes */
|
||||
size_t length;
|
||||
|
||||
/* Prefix state */
|
||||
|
||||
/* 1 if the prefix byte corresponding to the entry is present; 0 if not */
|
||||
uint8_t prefixPresent[0x100];
|
||||
/* contains the location (for use with the reader) of the prefix byte */
|
||||
uint64_t prefixLocations[0x100];
|
||||
/* The value of the REX prefix, if present */
|
||||
uint8_t rexPrefix;
|
||||
/* The location of the REX prefix */
|
||||
uint64_t rexLocation;
|
||||
/* The location where a mandatory prefix would have to be (i.e., right before
|
||||
the opcode, or right before the REX prefix if one is present) */
|
||||
uint64_t necessaryPrefixLocation;
|
||||
/* The segment override type */
|
||||
SegmentOverride segmentOverride;
|
||||
|
||||
/* Sizes of various critical pieces of data */
|
||||
uint8_t registerSize;
|
||||
uint8_t addressSize;
|
||||
uint8_t displacementSize;
|
||||
uint8_t immediateSize;
|
||||
|
||||
/* opcode state */
|
||||
|
||||
/* The value of the two-byte escape prefix (usually 0x0f) */
|
||||
uint8_t twoByteEscape;
|
||||
/* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
|
||||
uint8_t threeByteEscape;
|
||||
/* The last byte of the opcode, not counting any ModR/M extension */
|
||||
uint8_t opcode;
|
||||
/* The ModR/M byte of the instruction, if it is an opcode extension */
|
||||
uint8_t modRMExtension;
|
||||
|
||||
/* decode state */
|
||||
|
||||
/* The type of opcode, used for indexing into the array of decode tables */
|
||||
OpcodeType opcodeType;
|
||||
/* The instruction ID, extracted from the decode table */
|
||||
uint16_t instructionID;
|
||||
/* The specifier for the instruction, from the instruction info table */
|
||||
struct InstructionSpecifier* spec;
|
||||
|
||||
/* state for additional bytes, consumed during operand decode. Pattern:
|
||||
consumed___ indicates that the byte was already consumed and does not
|
||||
need to be consumed again */
|
||||
|
||||
/* The ModR/M byte, which contains most register operands and some portion of
|
||||
all memory operands */
|
||||
BOOL consumedModRM;
|
||||
uint8_t modRM;
|
||||
|
||||
/* The SIB byte, used for more complex 32- or 64-bit memory operands */
|
||||
BOOL consumedSIB;
|
||||
uint8_t sib;
|
||||
|
||||
/* The displacement, used for memory operands */
|
||||
BOOL consumedDisplacement;
|
||||
int32_t displacement;
|
||||
|
||||
/* Immediates. There can be two in some cases */
|
||||
uint8_t numImmediatesConsumed;
|
||||
uint8_t numImmediatesTranslated;
|
||||
uint64_t immediates[2];
|
||||
|
||||
/* A register or immediate operand encoded into the opcode */
|
||||
BOOL consumedOpcodeModifier;
|
||||
uint8_t opcodeModifier;
|
||||
Reg opcodeRegister;
|
||||
|
||||
/* Portions of the ModR/M byte */
|
||||
|
||||
/* These fields determine the allowable values for the ModR/M fields, which
|
||||
depend on operand and address widths */
|
||||
EABase eaBaseBase;
|
||||
EABase eaRegBase;
|
||||
Reg regBase;
|
||||
|
||||
/* The Mod and R/M fields can encode a base for an effective address, or a
|
||||
register. These are separated into two fields here */
|
||||
EABase eaBase;
|
||||
EADisplacement eaDisplacement;
|
||||
/* The reg field always encodes a register */
|
||||
Reg reg;
|
||||
|
||||
/* SIB state */
|
||||
SIBIndex sibIndex;
|
||||
uint8_t sibScale;
|
||||
SIBBase sibBase;
|
||||
};
|
||||
|
||||
/* decodeInstruction - Decode one instruction and store the decoding results in
|
||||
* a buffer provided by the consumer.
|
||||
* @param insn - The buffer to store the instruction in. Allocated by the
|
||||
* consumer.
|
||||
* @param reader - The byteReader_t for the bytes to be read.
|
||||
* @param readerArg - An argument to pass to the reader for storing context
|
||||
* specific to the consumer. May be NULL.
|
||||
* @param logger - The dlog_t to be used in printing status messages from the
|
||||
* disassembler. May be NULL.
|
||||
* @param loggerArg - An argument to pass to the logger for storing context
|
||||
* specific to the logger. May be NULL.
|
||||
* @param startLoc - The address (in the reader's address space) of the first
|
||||
* byte in the instruction.
|
||||
* @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
|
||||
* @return - Nonzero if there was an error during decode, 0 otherwise.
|
||||
*/
|
||||
int decodeInstruction(struct InternalInstruction* insn,
|
||||
byteReader_t reader,
|
||||
void* readerArg,
|
||||
dlog_t logger,
|
||||
void* loggerArg,
|
||||
uint64_t startLoc,
|
||||
DisassemblerMode mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,354 @@
|
|||
/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
|
||||
*
|
||||
* The LLVM Compiler Infrastructure
|
||||
*
|
||||
* This file is distributed under the University of Illinois Open Source
|
||||
* License. See LICENSE.TXT for details.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*
|
||||
*
|
||||
* This file is part of the X86 Disassembler.
|
||||
* It contains common definitions used by both the disassembler and the table
|
||||
* generator.
|
||||
* Documentation for the disassembler can be found in X86Disassembler.h.
|
||||
*
|
||||
*===----------------------------------------------------------------------===*/
|
||||
|
||||
/*
|
||||
* This header file provides those definitions that need to be shared between
|
||||
* the decoder and the table generator in a C-friendly manner.
|
||||
*/
|
||||
|
||||
#ifndef X86DISASSEMBLERDECODERCOMMON_H
|
||||
#define X86DISASSEMBLERDECODERCOMMON_H
|
||||
|
||||
#include "llvm/System/DataTypes.h"
|
||||
|
||||
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
|
||||
#define CONTEXTS_SYM x86DisassemblerContexts
|
||||
#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
|
||||
#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
|
||||
#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
|
||||
#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
|
||||
|
||||
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
|
||||
#define CONTEXTS_STR "x86DisassemblerContexts"
|
||||
#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
|
||||
#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
|
||||
#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
|
||||
#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
|
||||
|
||||
/*
|
||||
* Attributes of an instruction that must be known before the opcode can be
|
||||
* processed correctly. Most of these indicate the presence of particular
|
||||
* prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
|
||||
*/
|
||||
#define ATTRIBUTE_BITS \
|
||||
ENUM_ENTRY(ATTR_NONE, 0x00) \
|
||||
ENUM_ENTRY(ATTR_64BIT, 0x01) \
|
||||
ENUM_ENTRY(ATTR_XS, 0x02) \
|
||||
ENUM_ENTRY(ATTR_XD, 0x04) \
|
||||
ENUM_ENTRY(ATTR_REXW, 0x08) \
|
||||
ENUM_ENTRY(ATTR_OPSIZE, 0x10)
|
||||
|
||||
#define ENUM_ENTRY(n, v) n = v,
|
||||
enum attributeBits {
|
||||
ATTRIBUTE_BITS
|
||||
ATTR_max
|
||||
};
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
/*
|
||||
* Combinations of the above attributes that are relevant to instruction
|
||||
* decode. Although other combinations are possible, they can be reduced to
|
||||
* these without affecting the ultimately decoded instruction.
|
||||
*/
|
||||
|
||||
/* Class name Rank Rationale for rank assignment */
|
||||
#define INSTRUCTION_CONTEXTS \
|
||||
ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
|
||||
ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
|
||||
"64-bit mode but no more") \
|
||||
ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
|
||||
"operands change width") \
|
||||
ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
|
||||
"but not the operands") \
|
||||
ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
|
||||
"but not the operands") \
|
||||
ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
|
||||
"change width; overrides IC_OPSIZE") \
|
||||
ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
|
||||
ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
|
||||
"secondary") \
|
||||
ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
|
||||
ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
|
||||
"opcode") \
|
||||
ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
|
||||
"IC_64BIT_REXW_XS") \
|
||||
ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
|
||||
"else because this changes most " \
|
||||
"operands' meaning")
|
||||
|
||||
#define ENUM_ENTRY(n, r, d) n,
|
||||
typedef enum {
|
||||
INSTRUCTION_CONTEXTS
|
||||
IC_max
|
||||
} InstructionContext;
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
/*
|
||||
* Opcode types, which determine which decode table to use, both in the Intel
|
||||
* manual and also for the decoder.
|
||||
*/
|
||||
typedef enum {
|
||||
ONEBYTE = 0,
|
||||
TWOBYTE = 1,
|
||||
THREEBYTE_38 = 2,
|
||||
THREEBYTE_3A = 3
|
||||
} OpcodeType;
|
||||
|
||||
/*
|
||||
* The following structs are used for the hierarchical decode table. After
|
||||
* determining the instruction's class (i.e., which IC_* constant applies to
|
||||
* it), the decoder reads the opcode. Some instructions require specific
|
||||
* values of the ModR/M byte, so the ModR/M byte indexes into the final table.
|
||||
*
|
||||
* If a ModR/M byte is not required, "required" is left unset, and the values
|
||||
* for each instructionID are identical.
|
||||
*/
|
||||
|
||||
typedef uint16_t InstrUID;
|
||||
|
||||
/*
|
||||
* ModRMDecisionType - describes the type of ModR/M decision, allowing the
|
||||
* consumer to determine the number of entries in it.
|
||||
*
|
||||
* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
|
||||
* instruction is the same.
|
||||
* MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
|
||||
* corresponds to one instruction; otherwise, it corresponds to
|
||||
* a different instruction.
|
||||
* MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
|
||||
* to a different instruction.
|
||||
*/
|
||||
|
||||
#define MODRMTYPES \
|
||||
ENUM_ENTRY(MODRM_ONEENTRY) \
|
||||
ENUM_ENTRY(MODRM_SPLITRM) \
|
||||
ENUM_ENTRY(MODRM_FULL)
|
||||
|
||||
#define ENUM_ENTRY(n) n,
|
||||
typedef enum {
|
||||
MODRMTYPES
|
||||
MODRM_max
|
||||
} ModRMDecisionType;
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
/*
|
||||
* ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
|
||||
* instruction each possible value of the ModR/M byte corresponds to. Once
|
||||
* this information is known, we have narrowed down to a single instruction.
|
||||
*/
|
||||
struct ModRMDecision {
|
||||
uint8_t modrm_type;
|
||||
|
||||
/* The macro below must be defined wherever this file is included. */
|
||||
INSTRUCTION_IDS
|
||||
};
|
||||
|
||||
/*
|
||||
* OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
|
||||
* given a particular opcode.
|
||||
*/
|
||||
struct OpcodeDecision {
|
||||
struct ModRMDecision modRMDecisions[256];
|
||||
};
|
||||
|
||||
/*
|
||||
* ContextDecision - Specifies which opcode->instruction tables to look at given
|
||||
* a particular context (set of attributes). Since there are many possible
|
||||
* contexts, the decoder first uses CONTEXTS_SYM to determine which context
|
||||
* applies given a specific set of attributes. Hence there are only IC_max
|
||||
* entries in this table, rather than 2^(ATTR_max).
|
||||
*/
|
||||
struct ContextDecision {
|
||||
struct OpcodeDecision opcodeDecisions[IC_max];
|
||||
};
|
||||
|
||||
/*
|
||||
* Physical encodings of instruction operands.
|
||||
*/
|
||||
|
||||
#define ENCODINGS \
|
||||
ENUM_ENTRY(ENCODING_NONE, "") \
|
||||
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
|
||||
ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
|
||||
ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
|
||||
ENUM_ENTRY(ENCODING_CW, "2-byte") \
|
||||
ENUM_ENTRY(ENCODING_CD, "4-byte") \
|
||||
ENUM_ENTRY(ENCODING_CP, "6-byte") \
|
||||
ENUM_ENTRY(ENCODING_CO, "8-byte") \
|
||||
ENUM_ENTRY(ENCODING_CT, "10-byte") \
|
||||
ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
|
||||
ENUM_ENTRY(ENCODING_IW, "2-byte") \
|
||||
ENUM_ENTRY(ENCODING_ID, "4-byte") \
|
||||
ENUM_ENTRY(ENCODING_IO, "8-byte") \
|
||||
ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
|
||||
"the opcode byte") \
|
||||
ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
|
||||
ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
|
||||
ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
|
||||
ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
|
||||
"opcode byte") \
|
||||
\
|
||||
ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
|
||||
ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
|
||||
ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
|
||||
"opcode byte") \
|
||||
ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
|
||||
"in type")
|
||||
|
||||
#define ENUM_ENTRY(n, d) n,
|
||||
typedef enum {
|
||||
ENCODINGS
|
||||
ENCODING_max
|
||||
} OperandEncoding;
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
/*
|
||||
* Semantic interpretations of instruction operands.
|
||||
*/
|
||||
|
||||
#define TYPES \
|
||||
ENUM_ENTRY(TYPE_NONE, "") \
|
||||
ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
|
||||
ENUM_ENTRY(TYPE_REL16, "2-byte") \
|
||||
ENUM_ENTRY(TYPE_REL32, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_REL64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
|
||||
ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
|
||||
ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
|
||||
ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
|
||||
ENUM_ENTRY(TYPE_R16, "2-byte") \
|
||||
ENUM_ENTRY(TYPE_R32, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_R64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
|
||||
ENUM_ENTRY(TYPE_IMM16, "2-byte") \
|
||||
ENUM_ENTRY(TYPE_IMM32, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_IMM64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
|
||||
ENUM_ENTRY(TYPE_RM16, "2-byte") \
|
||||
ENUM_ENTRY(TYPE_RM32, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_RM64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_M, "Memory operand") \
|
||||
ENUM_ENTRY(TYPE_M8, "1-byte") \
|
||||
ENUM_ENTRY(TYPE_M16, "2-byte") \
|
||||
ENUM_ENTRY(TYPE_M32, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_M64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
|
||||
ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
|
||||
ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
|
||||
ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
|
||||
ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
|
||||
ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
|
||||
ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
|
||||
ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
|
||||
ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
|
||||
"base)") \
|
||||
ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
|
||||
ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
|
||||
"2 = SS, 3 = DS, 4 = FS, 5 = GS") \
|
||||
ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
|
||||
ENUM_ENTRY(TYPE_M64FP, "64-bit") \
|
||||
ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
|
||||
ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
|
||||
"floating-point instructions") \
|
||||
ENUM_ENTRY(TYPE_M32INT, "4-byte") \
|
||||
ENUM_ENTRY(TYPE_M64INT, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
|
||||
ENUM_ENTRY(TYPE_MM, "MMX register operand") \
|
||||
ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
|
||||
ENUM_ENTRY(TYPE_MM64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
|
||||
ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
|
||||
ENUM_ENTRY(TYPE_XMM64, "8-byte") \
|
||||
ENUM_ENTRY(TYPE_XMM128, "16-byte") \
|
||||
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
|
||||
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
|
||||
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
|
||||
ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \
|
||||
ENUM_ENTRY(TYPE_CR64, "8-byte") \
|
||||
\
|
||||
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
|
||||
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
|
||||
ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
|
||||
ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
|
||||
ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
|
||||
ENUM_ENTRY(TYPE_DUP1, "operand 1") \
|
||||
ENUM_ENTRY(TYPE_DUP2, "operand 2") \
|
||||
ENUM_ENTRY(TYPE_DUP3, "operand 3") \
|
||||
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
|
||||
ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
|
||||
|
||||
#define ENUM_ENTRY(n, d) n,
|
||||
typedef enum {
|
||||
TYPES
|
||||
TYPE_max
|
||||
} OperandType;
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
/*
|
||||
* OperandSpecifier - The specification for how to extract and interpret one
|
||||
* operand.
|
||||
*/
|
||||
struct OperandSpecifier {
|
||||
OperandEncoding encoding;
|
||||
OperandType type;
|
||||
};
|
||||
|
||||
/*
|
||||
* Indicates where the opcode modifier (if any) is to be found. Extended
|
||||
* opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
|
||||
*/
|
||||
|
||||
#define MODIFIER_TYPES \
|
||||
ENUM_ENTRY(MODIFIER_NONE) \
|
||||
ENUM_ENTRY(MODIFIER_OPCODE) \
|
||||
ENUM_ENTRY(MODIFIER_MODRM)
|
||||
|
||||
#define ENUM_ENTRY(n) n,
|
||||
typedef enum {
|
||||
MODIFIER_TYPES
|
||||
MODIFIER_max
|
||||
} ModifierType;
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
#define X86_MAX_OPERANDS 5
|
||||
|
||||
/*
|
||||
* The specification for how to extract and interpret a full instruction and
|
||||
* its operands.
|
||||
*/
|
||||
struct InstructionSpecifier {
|
||||
ModifierType modifierType;
|
||||
uint8_t modifierBase;
|
||||
struct OperandSpecifier operands[X86_MAX_OPERANDS];
|
||||
|
||||
/* The macro below must be defined wherever this file is included. */
|
||||
INSTRUCTION_SPECIFIER_FIELDS
|
||||
};
|
||||
|
||||
/*
|
||||
* Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
|
||||
* are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
|
||||
* respectively.
|
||||
*/
|
||||
typedef enum {
|
||||
MODE_16BIT,
|
||||
MODE_32BIT,
|
||||
MODE_64BIT
|
||||
} DisassemblerMode;
|
||||
|
||||
#endif
|
|
@ -15,8 +15,8 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
|
|||
X86GenRegisterInfo.inc X86GenInstrNames.inc \
|
||||
X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
|
||||
X86GenAsmWriter1.inc X86GenDAGISel.inc \
|
||||
X86GenFastISel.inc \
|
||||
X86GenCallingConv.inc X86GenSubtarget.inc
|
||||
X86GenDisassemblerTables.inc X86GenFastISel.inc \
|
||||
X86GenCallingConv.inc X86GenSubtarget.inc \
|
||||
|
||||
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
|
|||
}
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializeX86Disassembler();
|
||||
|
||||
extern "C" void LLVMInitializeX86Target() {
|
||||
// Register the target.
|
||||
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
|
||||
|
@ -47,6 +49,8 @@ extern "C" void LLVMInitializeX86Target() {
|
|||
RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
|
||||
RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
|
||||
|
||||
LLVMInitializeX86Disassembler();
|
||||
|
||||
// Register the code emitter.
|
||||
TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
|
||||
TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
|
||||
|
|
|
@ -23,6 +23,8 @@ add_executable(tblgen
|
|||
TGValueTypes.cpp
|
||||
TableGen.cpp
|
||||
TableGenBackend.cpp
|
||||
X86DisassemblerTables.cpp
|
||||
X86RecognizableInstr.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(tblgen LLVMSupport LLVMSystem)
|
||||
|
|
|
@ -10,7 +10,86 @@
|
|||
#include "DisassemblerEmitter.h"
|
||||
#include "CodeGenTarget.h"
|
||||
#include "Record.h"
|
||||
#include "X86DisassemblerTables.h"
|
||||
#include "X86RecognizableInstr.h"
|
||||
using namespace llvm;
|
||||
using namespace llvm::X86Disassembler;
|
||||
|
||||
/// DisassemblerEmitter - Contains disassembler table emitters for various
|
||||
/// architectures.
|
||||
|
||||
/// X86 Disassembler Emitter
|
||||
///
|
||||
/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
|
||||
/// THE END OF THIS COMMENT!
|
||||
///
|
||||
/// The X86 disassembler emitter is part of the X86 Disassembler, which is
|
||||
/// documented in lib/Target/X86/X86Disassembler.h.
|
||||
///
|
||||
/// The emitter produces the tables that the disassembler uses to translate
|
||||
/// instructions. The emitter generates the following tables:
|
||||
///
|
||||
/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
|
||||
/// instruction contexts. Although for each attribute there are cases where
|
||||
/// that attribute determines decoding, in the majority of cases decoding is
|
||||
/// the same whether or not an attribute is present. For example, a 64-bit
|
||||
/// instruction with an OPSIZE prefix and an XS prefix decodes the same way in
|
||||
/// all cases as a 64-bit instruction with only OPSIZE set. (The XS prefix
|
||||
/// may have effects on its execution, but does not change the instruction
|
||||
/// returned.) This allows considerable space savings in other tables.
|
||||
/// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and
|
||||
/// THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while
|
||||
/// decoding an instruction. At the lowest level of this hierarchy are
|
||||
/// instruction UIDs, 16-bit integers that can be used to uniquely identify
|
||||
/// the instruction and correspond exactly to its position in the list of
|
||||
/// CodeGenInstructions for the target.
|
||||
/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
|
||||
/// each instruction and how to decode them.
|
||||
///
|
||||
/// During table generation, there may be conflicts between instructions that
|
||||
/// occupy the same space in the decode tables. These conflicts are resolved as
|
||||
/// follows in setTableFields() (X86DisassemblerTables.cpp)
|
||||
///
|
||||
/// - If the current context is the native context for one of the instructions
|
||||
/// (that is, the attributes specified for it in the LLVM tables specify
|
||||
/// precisely the current context), then it has priority.
|
||||
/// - If the current context isn't native for either of the instructions, then
|
||||
/// the higher-priority context wins (that is, the one that is more specific).
|
||||
/// That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
|
||||
/// - If the current context is native for both instructions, then the table
|
||||
/// emitter reports a conflict and dies.
|
||||
///
|
||||
/// *** RESOLUTION FOR "Primary decode conflict"S
|
||||
///
|
||||
/// If two instructions collide, typically the solution is (in order of
|
||||
/// likelihood):
|
||||
///
|
||||
/// (1) to filter out one of the instructions by editing filter()
|
||||
/// (X86RecognizableInstr.cpp). This is the most common resolution, but
|
||||
/// check the Intel manuals first to make sure that (2) and (3) are not the
|
||||
/// problem.
|
||||
/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
|
||||
/// accurate. Sometimes they are not.
|
||||
/// (3) to fix the tables to reflect the actual context (for example, required
|
||||
/// prefixes), and possibly to add a new context by editing
|
||||
/// lib/Target/X86/X86DisassemblerDecoderCommon.h. This is unlikely to be
|
||||
/// the cause.
|
||||
///
|
||||
/// DisassemblerEmitter.cpp contains the implementation for the emitter,
|
||||
/// which simply pulls out instructions from the CodeGenTarget and pushes them
|
||||
/// into X86DisassemblerTables.
|
||||
/// X86DisassemblerTables.h contains the interface for the instruction tables,
|
||||
/// which manage and emit the structures discussed above.
|
||||
/// X86DisassemblerTables.cpp contains the implementation for the instruction
|
||||
/// tables.
|
||||
/// X86ModRMFilters.h contains filters that can be used to determine which
|
||||
/// ModR/M values are valid for a particular instruction. These are used to
|
||||
/// populate ModRMDecisions.
|
||||
/// X86RecognizableInstr.h contains the interface for a single instruction,
|
||||
/// which knows how to translate itself from a CodeGenInstruction and provide
|
||||
/// the information necessary for integration into the tables.
|
||||
/// X86RecognizableInstr.cpp contains the implementation for a single
|
||||
/// instruction.
|
||||
|
||||
void DisassemblerEmitter::run(raw_ostream &OS) {
|
||||
CodeGenTarget Target;
|
||||
|
@ -25,6 +104,26 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
|
|||
<< " *===---------------------------------------------------------------"
|
||||
<< "-------===*/\n";
|
||||
|
||||
// X86 uses a custom disassembler.
|
||||
if (Target.getName() == "X86") {
|
||||
DisassemblerTables Tables;
|
||||
|
||||
std::vector<const CodeGenInstruction*> numberedInstructions;
|
||||
Target.getInstructionsByEnumValue(numberedInstructions);
|
||||
|
||||
for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
|
||||
RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
|
||||
|
||||
// FIXME: As long as we are using exceptions, might as well drop this to the
|
||||
// actual conflict site.
|
||||
if (Tables.hasConflicts())
|
||||
throw TGError(Target.getTargetRecord()->getLoc(),
|
||||
"Primary decode conflict");
|
||||
|
||||
Tables.emit(OS);
|
||||
return;
|
||||
}
|
||||
|
||||
throw TGError(Target.getTargetRecord()->getLoc(),
|
||||
"Unable to generate disassembler for this target");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
//===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef X86DISASSEMBLERSHARED_H
|
||||
#define X86DISASSEMBLERSHARED_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#define INSTRUCTION_SPECIFIER_FIELDS \
|
||||
bool filtered; \
|
||||
InstructionContext insnContext; \
|
||||
std::string name; \
|
||||
\
|
||||
InstructionSpecifier() { \
|
||||
filtered = false; \
|
||||
insnContext = IC; \
|
||||
name = ""; \
|
||||
modifierType = MODIFIER_NONE; \
|
||||
modifierBase = 0; \
|
||||
bzero(operands, sizeof(operands)); \
|
||||
}
|
||||
|
||||
#define INSTRUCTION_IDS \
|
||||
InstrUID instructionIDs[256];
|
||||
|
||||
#include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
|
||||
|
||||
#undef INSTRUCTION_SPECIFIER_FIELDS
|
||||
#undef INSTRUCTION_IDS
|
||||
|
||||
#endif
|
|
@ -0,0 +1,603 @@
|
|||
//===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is part of the X86 Disassembler Emitter.
|
||||
// It contains the implementation of the disassembler tables.
|
||||
// Documentation for the disassembler emitter in general can be found in
|
||||
// X86DisasemblerEmitter.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86DisassemblerShared.h"
|
||||
#include "X86DisassemblerTables.h"
|
||||
|
||||
#include "TableGenBackend.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace X86Disassembler;
|
||||
|
||||
/// inheritsFrom - Indicates whether all instructions in one class also belong
|
||||
/// to another class.
|
||||
///
|
||||
/// @param child - The class that may be the subset
|
||||
/// @param parent - The class that may be the superset
|
||||
/// @return - True if child is a subset of parent, false otherwise.
|
||||
static inline bool inheritsFrom(InstructionContext child,
|
||||
InstructionContext parent) {
|
||||
if (child == parent)
|
||||
return true;
|
||||
|
||||
switch (parent) {
|
||||
case IC:
|
||||
return true;
|
||||
case IC_64BIT:
|
||||
return(inheritsFrom(child, IC_64BIT_REXW) ||
|
||||
inheritsFrom(child, IC_64BIT_OPSIZE) ||
|
||||
inheritsFrom(child, IC_64BIT_XD) ||
|
||||
inheritsFrom(child, IC_64BIT_XS));
|
||||
case IC_OPSIZE:
|
||||
return(inheritsFrom(child, IC_64BIT_OPSIZE));
|
||||
case IC_XD:
|
||||
return(inheritsFrom(child, IC_64BIT_XD));
|
||||
case IC_XS:
|
||||
return(inheritsFrom(child, IC_64BIT_XS));
|
||||
case IC_64BIT_REXW:
|
||||
return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
|
||||
inheritsFrom(child, IC_64BIT_REXW_XD) ||
|
||||
inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
|
||||
case IC_64BIT_OPSIZE:
|
||||
return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
|
||||
case IC_64BIT_XD:
|
||||
return(inheritsFrom(child, IC_64BIT_REXW_XD));
|
||||
case IC_64BIT_XS:
|
||||
return(inheritsFrom(child, IC_64BIT_REXW_XS));
|
||||
case IC_64BIT_REXW_XD:
|
||||
return false;
|
||||
case IC_64BIT_REXW_XS:
|
||||
return false;
|
||||
case IC_64BIT_REXW_OPSIZE:
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// outranks - Indicates whether, if an instruction has two different applicable
|
||||
/// classes, which class should be preferred when performing decode. This
|
||||
/// imposes a total ordering (ties are resolved toward "lower")
|
||||
///
|
||||
/// @param upper - The class that may be preferable
|
||||
/// @param lower - The class that may be less preferable
|
||||
/// @return - True if upper is to be preferred, false otherwise.
|
||||
static inline bool outranks(InstructionContext upper,
|
||||
InstructionContext lower) {
|
||||
assert(upper < IC_max);
|
||||
assert(lower < IC_max);
|
||||
|
||||
#define ENUM_ENTRY(n, r, d) r,
|
||||
static int ranks[IC_max] = {
|
||||
INSTRUCTION_CONTEXTS
|
||||
};
|
||||
#undef ENUM_ENTRY
|
||||
|
||||
return (ranks[upper] > ranks[lower]);
|
||||
}
|
||||
|
||||
/// stringForContext - Returns a string containing the name of a particular
|
||||
/// InstructionContext, usually for diagnostic purposes.
|
||||
///
|
||||
/// @param insnContext - The instruction class to transform to a string.
|
||||
/// @return - A statically-allocated string constant that contains the
|
||||
/// name of the instruction class.
|
||||
static inline const char* stringForContext(InstructionContext insnContext) {
|
||||
switch (insnContext) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled instruction class");
|
||||
#define ENUM_ENTRY(n, r, d) case n: return #n; break;
|
||||
INSTRUCTION_CONTEXTS
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
/// stringForOperandType - Like stringForContext, but for OperandTypes.
|
||||
static inline const char* stringForOperandType(OperandType type) {
|
||||
switch (type) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled type");
|
||||
#define ENUM_ENTRY(i, d) case i: return #i;
|
||||
TYPES
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
/// stringForOperandEncoding - like stringForContext, but for
|
||||
/// OperandEncodings.
|
||||
static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
|
||||
switch (encoding) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled encoding");
|
||||
#define ENUM_ENTRY(i, d) case i: return #i;
|
||||
ENCODINGS
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitOneID(raw_ostream &o,
|
||||
uint32_t &i,
|
||||
InstrUID id,
|
||||
bool addComma) const {
|
||||
if (id)
|
||||
o.indent(i * 2) << format("0x%hx", id);
|
||||
else
|
||||
o.indent(i * 2) << 0;
|
||||
|
||||
if (addComma)
|
||||
o << ", ";
|
||||
else
|
||||
o << " ";
|
||||
|
||||
o << "/* ";
|
||||
o << InstructionSpecifiers[id].name;
|
||||
o << "*/";
|
||||
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
/// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by
|
||||
/// all ModR/M decisions for instructions that are invalid for all possible
|
||||
/// ModR/M byte values.
|
||||
///
|
||||
/// @param o - The output stream on which to emit the table.
|
||||
/// @param i - The indentation level for that output stream.
|
||||
static void emitEmptyTable(raw_ostream &o, uint32_t &i)
|
||||
{
|
||||
o.indent(i * 2) << "InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
/// getDecisionType - Determines whether a ModRM decision with 255 entries can
|
||||
/// be compacted by eliminating redundant information.
|
||||
///
|
||||
/// @param decision - The decision to be compacted.
|
||||
/// @return - The compactest available representation for the decision.
|
||||
static ModRMDecisionType getDecisionType(ModRMDecision &decision)
|
||||
{
|
||||
bool satisfiesOneEntry = true;
|
||||
bool satisfiesSplitRM = true;
|
||||
|
||||
uint16_t index;
|
||||
|
||||
for (index = 0; index < 256; ++index) {
|
||||
if (decision.instructionIDs[index] != decision.instructionIDs[0])
|
||||
satisfiesOneEntry = false;
|
||||
|
||||
if (((index & 0xc0) == 0xc0) &&
|
||||
(decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
|
||||
satisfiesSplitRM = false;
|
||||
|
||||
if (((index & 0xc0) != 0xc0) &&
|
||||
(decision.instructionIDs[index] != decision.instructionIDs[0x00]))
|
||||
satisfiesSplitRM = false;
|
||||
}
|
||||
|
||||
if (satisfiesOneEntry)
|
||||
return MODRM_ONEENTRY;
|
||||
|
||||
if (satisfiesSplitRM)
|
||||
return MODRM_SPLITRM;
|
||||
|
||||
return MODRM_FULL;
|
||||
}
|
||||
|
||||
/// stringForDecisionType - Returns a statically-allocated string corresponding
|
||||
/// to a particular decision type.
|
||||
///
|
||||
/// @param dt - The decision type.
|
||||
/// @return - A pointer to the statically-allocated string (e.g.,
|
||||
/// "MODRM_ONEENTRY" for MODRM_ONEENTRY).
|
||||
static const char* stringForDecisionType(ModRMDecisionType dt)
|
||||
{
|
||||
#define ENUM_ENTRY(n) case n: return #n;
|
||||
switch (dt) {
|
||||
default:
|
||||
llvm_unreachable("Unknown decision type");
|
||||
MODRMTYPES
|
||||
};
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
|
||||
/// stringForModifierType - Returns a statically-allocated string corresponding
|
||||
/// to an opcode modifier type.
|
||||
///
|
||||
/// @param mt - The modifier type.
|
||||
/// @return - A pointer to the statically-allocated string (e.g.,
|
||||
/// "MODIFIER_NONE" for MODIFIER_NONE).
|
||||
static const char* stringForModifierType(ModifierType mt)
|
||||
{
|
||||
#define ENUM_ENTRY(n) case n: return #n;
|
||||
switch(mt) {
|
||||
default:
|
||||
llvm_unreachable("Unknown modifier type");
|
||||
MODIFIER_TYPES
|
||||
};
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
|
||||
DisassemblerTables::DisassemblerTables() {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
Tables[i] = new ContextDecision;
|
||||
bzero(Tables[i], sizeof(ContextDecision));
|
||||
}
|
||||
|
||||
HasConflicts = false;
|
||||
}
|
||||
|
||||
DisassemblerTables::~DisassemblerTables() {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
delete Tables[i];
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2,
|
||||
ModRMDecision &decision)
|
||||
const {
|
||||
static uint64_t sTableNumber = 0;
|
||||
uint64_t thisTableNumber = sTableNumber;
|
||||
ModRMDecisionType dt = getDecisionType(decision);
|
||||
uint16_t index;
|
||||
|
||||
if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
|
||||
{
|
||||
o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
|
||||
i2++;
|
||||
|
||||
o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
|
||||
o2.indent(i2) << "modRMEmptyTable";
|
||||
|
||||
i2--;
|
||||
o2.indent(i2) << "}";
|
||||
return;
|
||||
}
|
||||
|
||||
o1.indent(i1) << "InstrUID modRMTable" << thisTableNumber;
|
||||
|
||||
switch (dt) {
|
||||
default:
|
||||
llvm_unreachable("Unknown decision type");
|
||||
case MODRM_ONEENTRY:
|
||||
o1 << "[1]";
|
||||
break;
|
||||
case MODRM_SPLITRM:
|
||||
o1 << "[2]";
|
||||
break;
|
||||
case MODRM_FULL:
|
||||
o1 << "[256]";
|
||||
break;
|
||||
}
|
||||
|
||||
o1 << " = {" << "\n";
|
||||
i1++;
|
||||
|
||||
switch (dt) {
|
||||
default:
|
||||
llvm_unreachable("Unknown decision type");
|
||||
case MODRM_ONEENTRY:
|
||||
emitOneID(o1, i1, decision.instructionIDs[0], false);
|
||||
break;
|
||||
case MODRM_SPLITRM:
|
||||
emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
|
||||
emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
|
||||
break;
|
||||
case MODRM_FULL:
|
||||
for (index = 0; index < 256; ++index)
|
||||
emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
|
||||
break;
|
||||
}
|
||||
|
||||
i1--;
|
||||
o1.indent(i1) << "};" << "\n";
|
||||
o1 << "\n";
|
||||
|
||||
o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
|
||||
i2++;
|
||||
|
||||
o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
|
||||
o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
|
||||
|
||||
i2--;
|
||||
o2.indent(i2) << "}";
|
||||
|
||||
++sTableNumber;
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitOpcodeDecision(
|
||||
raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2,
|
||||
OpcodeDecision &decision) const {
|
||||
uint16_t index;
|
||||
|
||||
o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
|
||||
i2++;
|
||||
o2.indent(i2) << "{" << "\n";
|
||||
i2++;
|
||||
|
||||
for (index = 0; index < 256; ++index) {
|
||||
o2.indent(i2);
|
||||
|
||||
o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
|
||||
|
||||
emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]);
|
||||
|
||||
if (index < 255)
|
||||
o2 << ",";
|
||||
|
||||
o2 << "\n";
|
||||
}
|
||||
|
||||
i2--;
|
||||
o2.indent(i2) << "}" << "\n";
|
||||
i2--;
|
||||
o2.indent(i2) << "}" << "\n";
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitContextDecision(
|
||||
raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2,
|
||||
ContextDecision &decision,
|
||||
const char* name) const {
|
||||
o2.indent(i2) << "struct ContextDecision " << name << " = {" << "\n";
|
||||
i2++;
|
||||
o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
|
||||
i2++;
|
||||
|
||||
unsigned index;
|
||||
|
||||
for (index = 0; index < IC_max; ++index) {
|
||||
o2.indent(i2) << "/* ";
|
||||
o2 << stringForContext((InstructionContext)index);
|
||||
o2 << " */";
|
||||
o2 << "\n";
|
||||
|
||||
emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]);
|
||||
|
||||
if (index + 1 < IC_max)
|
||||
o2 << ", ";
|
||||
}
|
||||
|
||||
i2--;
|
||||
o2.indent(i2) << "}" << "\n";
|
||||
i2--;
|
||||
o2.indent(i2) << "};" << "\n";
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
|
||||
const {
|
||||
o.indent(i * 2) << "struct InstructionSpecifier ";
|
||||
o << INSTRUCTIONS_STR << "[";
|
||||
o << InstructionSpecifiers.size();
|
||||
o << "] = {" << "\n";
|
||||
|
||||
i++;
|
||||
|
||||
uint16_t numInstructions = InstructionSpecifiers.size();
|
||||
uint16_t index, operandIndex;
|
||||
|
||||
for (index = 0; index < numInstructions; ++index) {
|
||||
o.indent(i * 2) << "{ /* " << index << " */" << "\n";
|
||||
i++;
|
||||
|
||||
o.indent(i * 2) <<
|
||||
stringForModifierType(InstructionSpecifiers[index].modifierType);
|
||||
o << "," << "\n";
|
||||
|
||||
o.indent(i * 2) << "0x";
|
||||
o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
|
||||
o << "," << "\n";
|
||||
|
||||
o.indent(i * 2) << "{" << "\n";
|
||||
i++;
|
||||
|
||||
for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
|
||||
o.indent(i * 2) << "{ ";
|
||||
o << stringForOperandEncoding(InstructionSpecifiers[index]
|
||||
.operands[operandIndex]
|
||||
.encoding);
|
||||
o << ", ";
|
||||
o << stringForOperandType(InstructionSpecifiers[index]
|
||||
.operands[operandIndex]
|
||||
.type);
|
||||
o << " }";
|
||||
|
||||
if (operandIndex < X86_MAX_OPERANDS - 1)
|
||||
o << ",";
|
||||
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
i--;
|
||||
o.indent(i * 2) << "}," << "\n";
|
||||
|
||||
o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\"";
|
||||
o << "\n";
|
||||
|
||||
i--;
|
||||
o.indent(i * 2) << "}";
|
||||
|
||||
if (index + 1 < numInstructions)
|
||||
o << ",";
|
||||
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
i--;
|
||||
o.indent(i * 2) << "};" << "\n";
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
|
||||
uint16_t index;
|
||||
|
||||
o.indent(i * 2) << "InstructionContext ";
|
||||
o << CONTEXTS_STR << "[256] = {" << "\n";
|
||||
i++;
|
||||
|
||||
for (index = 0; index < 256; ++index) {
|
||||
o.indent(i * 2);
|
||||
|
||||
if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
|
||||
o << "IC_64BIT_REXW_XS";
|
||||
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
|
||||
o << "IC_64BIT_REXW_XD";
|
||||
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&
|
||||
(index & ATTR_OPSIZE))
|
||||
o << "IC_64BIT_REXW_OPSIZE";
|
||||
else if ((index & ATTR_64BIT) && (index & ATTR_XS))
|
||||
o << "IC_64BIT_XS";
|
||||
else if ((index & ATTR_64BIT) && (index & ATTR_XD))
|
||||
o << "IC_64BIT_XD";
|
||||
else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE))
|
||||
o << "IC_64BIT_OPSIZE";
|
||||
else if ((index & ATTR_64BIT) && (index & ATTR_REXW))
|
||||
o << "IC_64BIT_REXW";
|
||||
else if ((index & ATTR_64BIT))
|
||||
o << "IC_64BIT";
|
||||
else if (index & ATTR_XS)
|
||||
o << "IC_XS";
|
||||
else if (index & ATTR_XD)
|
||||
o << "IC_XD";
|
||||
else if (index & ATTR_OPSIZE)
|
||||
o << "IC_OPSIZE";
|
||||
else
|
||||
o << "IC";
|
||||
|
||||
if (index < 255)
|
||||
o << ",";
|
||||
else
|
||||
o << " ";
|
||||
|
||||
o << " /* " << index << " */";
|
||||
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
i--;
|
||||
o.indent(i * 2) << "};" << "\n";
|
||||
}
|
||||
|
||||
void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2)
|
||||
const {
|
||||
emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
|
||||
emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
|
||||
emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
|
||||
emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR);
|
||||
}
|
||||
|
||||
void DisassemblerTables::emit(raw_ostream &o) const {
|
||||
uint32_t i1 = 0;
|
||||
uint32_t i2 = 0;
|
||||
|
||||
std::string s1;
|
||||
std::string s2;
|
||||
|
||||
raw_string_ostream o1(s1);
|
||||
raw_string_ostream o2(s2);
|
||||
|
||||
emitInstructionInfo(o, i2);
|
||||
o << "\n";
|
||||
|
||||
emitContextTable(o, i2);
|
||||
o << "\n";
|
||||
|
||||
emitEmptyTable(o1, i1);
|
||||
emitContextDecisions(o1, o2, i1, i2);
|
||||
|
||||
o << o1.str();
|
||||
o << "\n";
|
||||
o << o2.str();
|
||||
o << "\n";
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
void DisassemblerTables::setTableFields(ModRMDecision &decision,
|
||||
const ModRMFilter &filter,
|
||||
InstrUID uid,
|
||||
uint8_t opcode) {
|
||||
unsigned index;
|
||||
|
||||
for (index = 0; index < 256; ++index) {
|
||||
if (filter.accepts(index)) {
|
||||
if (decision.instructionIDs[index] == uid)
|
||||
continue;
|
||||
|
||||
if (decision.instructionIDs[index] != 0) {
|
||||
InstructionSpecifier &newInfo =
|
||||
InstructionSpecifiers[uid];
|
||||
InstructionSpecifier &previousInfo =
|
||||
InstructionSpecifiers[decision.instructionIDs[index]];
|
||||
|
||||
if(newInfo.filtered)
|
||||
continue; // filtered instructions get lowest priority
|
||||
|
||||
if(previousInfo.name == "NOOP")
|
||||
continue; // special case for XCHG32ar and NOOP
|
||||
|
||||
if (outranks(previousInfo.insnContext, newInfo.insnContext))
|
||||
continue;
|
||||
|
||||
if (previousInfo.insnContext == newInfo.insnContext &&
|
||||
!previousInfo.filtered) {
|
||||
errs() << "Error: Primary decode conflict: ";
|
||||
errs() << newInfo.name << " would overwrite " << previousInfo.name;
|
||||
errs() << "\n";
|
||||
errs() << "ModRM " << index << "\n";
|
||||
errs() << "Opcode " << (uint16_t)opcode << "\n";
|
||||
errs() << "Context " << stringForContext(newInfo.insnContext) << "\n";
|
||||
HasConflicts = true;
|
||||
}
|
||||
}
|
||||
|
||||
decision.instructionIDs[index] = uid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DisassemblerTables::setTableFields(OpcodeType type,
|
||||
InstructionContext insnContext,
|
||||
uint8_t opcode,
|
||||
const ModRMFilter &filter,
|
||||
InstrUID uid) {
|
||||
unsigned index;
|
||||
|
||||
ContextDecision &decision = *Tables[type];
|
||||
|
||||
for (index = 0; index < IC_max; ++index) {
|
||||
if (inheritsFrom((InstructionContext)index,
|
||||
InstructionSpecifiers[uid].insnContext))
|
||||
setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
|
||||
filter,
|
||||
uid,
|
||||
opcode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,291 @@
|
|||
//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is part of the X86 Disassembler Emitter.
|
||||
// It contains the interface of the disassembler tables.
|
||||
// Documentation for the disassembler emitter in general can be found in
|
||||
// X86DisasemblerEmitter.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef X86DISASSEMBLERTABLES_H
|
||||
#define X86DISASSEMBLERTABLES_H
|
||||
|
||||
#include "X86DisassemblerShared.h"
|
||||
#include "X86ModRMFilters.h"
|
||||
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace X86Disassembler {
|
||||
|
||||
/// DisassemblerTables - Encapsulates all the decode tables being generated by
|
||||
/// the table emitter. Contains functions to populate the tables as well as
|
||||
/// to emit them as hierarchical C structures suitable for consumption by the
|
||||
/// runtime.
|
||||
class DisassemblerTables {
|
||||
private:
|
||||
/// The decoder tables. There is one for each opcode type:
|
||||
/// [0] one-byte opcodes
|
||||
/// [1] two-byte opcodes of the form 0f __
|
||||
/// [2] three-byte opcodes of the form 0f 38 __
|
||||
/// [3] three-byte opcodes of the form 0f 3a __
|
||||
ContextDecision* Tables[4];
|
||||
|
||||
/// The instruction information table
|
||||
std::vector<InstructionSpecifier> InstructionSpecifiers;
|
||||
|
||||
/// True if there are primary decode conflicts in the instruction set
|
||||
bool HasConflicts;
|
||||
|
||||
/// emitOneID - Emits a table entry for a single instruction entry, at the
|
||||
/// innermost level of the structure hierarchy. The entry is printed out
|
||||
/// in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
|
||||
/// the comma is printed if addComma is true, and the menonic is the name
|
||||
/// of the instruction as listed in the LLVM tables.
|
||||
///
|
||||
/// @param o - The output stream to print the entry on.
|
||||
/// @param i - The indentation level for o.
|
||||
/// @param id - The unique ID of the instruction to print.
|
||||
/// @param addComma - Whether or not to print a comma after the ID. True if
|
||||
/// additional items will follow.
|
||||
void emitOneID(raw_ostream &o,
|
||||
uint32_t &i,
|
||||
InstrUID id,
|
||||
bool addComma) const;
|
||||
|
||||
/// emitModRMDecision - Emits a table of entries corresponding to a single
|
||||
/// ModR/M decision. Compacts the ModR/M decision if possible. ModR/M
|
||||
/// decisions are printed as:
|
||||
///
|
||||
/// { /* struct ModRMDecision */
|
||||
/// TYPE,
|
||||
/// modRMTablennnn
|
||||
/// }
|
||||
///
|
||||
/// where nnnn is a unique ID for the corresponding table of IDs.
|
||||
/// TYPE indicates whether the table has one entry that is the same
|
||||
/// regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
|
||||
/// for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
|
||||
/// nnnn is the number of a table for looking up these values. The tables
|
||||
/// are writen separately so that tables consisting entirely of zeros will
|
||||
/// not be duplicated. (These all have the name modRMEmptyTable.) A table
|
||||
/// is printed as:
|
||||
///
|
||||
/// InstrUID modRMTablennnn[k] = {
|
||||
/// nnnn, /* MNEMONIC */
|
||||
/// ...
|
||||
/// nnnn /* MNEMONIC */
|
||||
/// };
|
||||
///
|
||||
/// @param o1 - The output stream to print the ID table to.
|
||||
/// @param o2 - The output stream to print the decision structure to.
|
||||
/// @param i1 - The indentation level to use with stream o1.
|
||||
/// @param i2 - The indentation level to use with stream o2.
|
||||
/// @param decision - The ModR/M decision to emit. This decision has 256
|
||||
/// entries - emitModRMDecision decides how to compact it.
|
||||
void emitModRMDecision(raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2,
|
||||
ModRMDecision &decision) const;
|
||||
|
||||
/// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
|
||||
/// decisions. An OpcodeDecision is printed as:
|
||||
///
|
||||
/// { /* struct OpcodeDecision */
|
||||
/// /* 0x00 */
|
||||
/// { /* struct ModRMDecision */
|
||||
/// ...
|
||||
/// }
|
||||
/// ...
|
||||
/// }
|
||||
///
|
||||
/// where the ModRMDecision structure is printed as described in the
|
||||
/// documentation for emitModRMDecision(). emitOpcodeDecision() passes on a
|
||||
/// stream and indent level for the UID tables generated by
|
||||
/// emitModRMDecision(), but does not use them itself.
|
||||
///
|
||||
/// @param o1 - The output stream to print the ID tables generated by
|
||||
/// emitModRMDecision() to.
|
||||
/// @param o2 - The output stream for the decision structure itself.
|
||||
/// @param i1 - The indent level to use with stream o1.
|
||||
/// @param i2 - The indent level to use with stream o2.
|
||||
/// @param decision - The OpcodeDecision to emit along with its subsidiary
|
||||
/// structures.
|
||||
void emitOpcodeDecision(raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2,
|
||||
OpcodeDecision &decision) const;
|
||||
|
||||
/// emitContextDecision - Emits a ContextDecision and all its subsidiary
|
||||
/// Opcode and ModRMDecisions. A ContextDecision is printed as:
|
||||
///
|
||||
/// struct ContextDecision NAME = {
|
||||
/// { /* OpcodeDecisions */
|
||||
/// /* IC */
|
||||
/// { /* struct OpcodeDecision */
|
||||
/// ...
|
||||
/// },
|
||||
/// ...
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// NAME is the name of the ContextDecision (typically one of the four names
|
||||
/// ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and THREEBYTE3A_SYM from
|
||||
/// X86DisassemblerDecoderCommon.h).
|
||||
/// IC is one of the contexts in InstructionContext. There is an opcode
|
||||
/// decision for each possible context.
|
||||
/// The OpcodeDecision structures are printed as described in the
|
||||
/// documentation for emitOpcodeDecision.
|
||||
///
|
||||
/// @param o1 - The output stream to print the ID tables generated by
|
||||
/// emitModRMDecision() to.
|
||||
/// @param o2 - The output stream to print the decision structure to.
|
||||
/// @param i1 - The indent level to use with stream o1.
|
||||
/// @param i2 - The indent level to use with stream o2.
|
||||
/// @param decision - The ContextDecision to emit along with its subsidiary
|
||||
/// structures.
|
||||
/// @param name - The name for the ContextDecision.
|
||||
void emitContextDecision(raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2,
|
||||
ContextDecision &decision,
|
||||
const char* name) const;
|
||||
|
||||
/// emitInstructionInfo - Prints the instruction specifier table, which has
|
||||
/// one entry for each instruction, and contains name and operand
|
||||
/// information. This table is printed as:
|
||||
///
|
||||
/// struct InstructionSpecifier CONTEXTS_SYM[k] = {
|
||||
/// {
|
||||
/// /* nnnn */
|
||||
/// "MNEMONIC",
|
||||
/// 0xnn,
|
||||
/// {
|
||||
/// {
|
||||
/// ENCODING,
|
||||
/// TYPE
|
||||
/// },
|
||||
/// ...
|
||||
/// }
|
||||
/// },
|
||||
/// };
|
||||
///
|
||||
/// k is the total number of instructions.
|
||||
/// nnnn is the ID of the current instruction (0-based). This table
|
||||
/// includes entries for non-instructions like PHINODE.
|
||||
/// 0xnn is the lowest possible opcode for the current instruction, used for
|
||||
/// AddRegFrm instructions to compute the operand's value.
|
||||
/// ENCODING and TYPE describe the encoding and type for a single operand.
|
||||
///
|
||||
/// @param o - The output stream to which the instruction table should be
|
||||
/// written.
|
||||
/// @param i - The indent level for use with the stream.
|
||||
void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
|
||||
|
||||
/// emitContextTable - Prints the table that is used to translate from an
|
||||
/// instruction attribute mask to an instruction context. This table is
|
||||
/// printed as:
|
||||
///
|
||||
/// InstructionContext CONTEXTS_STR[256] = {
|
||||
/// IC, /* 0x00 */
|
||||
/// ...
|
||||
/// };
|
||||
///
|
||||
/// IC is the context corresponding to the mask 0x00, and there are 256
|
||||
/// possible masks.
|
||||
///
|
||||
/// @param o - The output stream to which the context table should be written.
|
||||
/// @param i - The indent level for use with the stream.
|
||||
void emitContextTable(raw_ostream &o, uint32_t &i) const;
|
||||
|
||||
/// emitContextDecisions - Prints all four ContextDecision structures using
|
||||
/// emitContextDecision().
|
||||
///
|
||||
/// @param o1 - The output stream to print the ID tables generated by
|
||||
/// emitModRMDecision() to.
|
||||
/// @param o2 - The output stream to print the decision structures to.
|
||||
/// @param i1 - The indent level to use with stream o1.
|
||||
/// @param i2 - The indent level to use with stream o2.
|
||||
void emitContextDecisions(raw_ostream &o1,
|
||||
raw_ostream &o2,
|
||||
uint32_t &i1,
|
||||
uint32_t &i2) const;
|
||||
|
||||
/// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
|
||||
/// ModRMDecision to refer to a particular instruction ID.
|
||||
///
|
||||
/// @param decision - The ModRMDecision to populate.
|
||||
/// @param filter - The filter to use in deciding which entries to populate.
|
||||
/// @param uid - The unique ID to set matching entries to.
|
||||
/// @param opcode - The opcode of the instruction, for error reporting.
|
||||
void setTableFields(ModRMDecision &decision,
|
||||
const ModRMFilter &filter,
|
||||
InstrUID uid,
|
||||
uint8_t opcode);
|
||||
public:
|
||||
/// Constructor - Allocates space for the class decisions and clears them.
|
||||
DisassemblerTables();
|
||||
|
||||
~DisassemblerTables();
|
||||
|
||||
/// emit - Emits the instruction table, context table, and class decisions.
|
||||
///
|
||||
/// @param o - The output stream to print the tables to.
|
||||
void emit(raw_ostream &o) const;
|
||||
|
||||
/// setTableFields - Uses the opcode type, instruction context, opcode, and a
|
||||
/// ModRMFilter as criteria to set a particular set of entries in the
|
||||
/// decode tables to point to a specific uid.
|
||||
///
|
||||
/// @param type - The opcode type (ONEBYTE, TWOBYTE, etc.)
|
||||
/// @param insnContext - The context to use (IC, IC_64BIT, etc.)
|
||||
/// @param opcode - The last byte of the opcode (not counting any escape
|
||||
/// or extended opcodes).
|
||||
/// @param filter - The ModRMFilter that decides which ModR/M byte values
|
||||
/// correspond to the desired instruction.
|
||||
/// @param uid - The unique ID of the instruction.
|
||||
void setTableFields(OpcodeType type,
|
||||
InstructionContext insnContext,
|
||||
uint8_t opcode,
|
||||
const ModRMFilter &filter,
|
||||
InstrUID uid);
|
||||
|
||||
/// specForUID - Returns the instruction specifier for a given unique
|
||||
/// instruction ID. Used when resolving collisions.
|
||||
///
|
||||
/// @param uid - The unique ID of the instruction.
|
||||
/// @return - A reference to the instruction specifier.
|
||||
InstructionSpecifier& specForUID(InstrUID uid) {
|
||||
if (uid >= InstructionSpecifiers.size())
|
||||
InstructionSpecifiers.resize(uid + 1);
|
||||
|
||||
return InstructionSpecifiers[uid];
|
||||
}
|
||||
|
||||
// hasConflicts - Reports whether there were primary decode conflicts
|
||||
// from any instructions added to the tables.
|
||||
// @return - true if there were; false otherwise.
|
||||
|
||||
bool hasConflicts() {
|
||||
return HasConflicts;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace X86Disassembler
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
|
@ -0,0 +1,197 @@
|
|||
//===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is part of the X86 Disassembler Emitter.
|
||||
// It contains ModR/M filters that determine which values of the ModR/M byte
|
||||
// are valid for a partiuclar instruction.
|
||||
// Documentation for the disassembler emitter in general can be found in
|
||||
// X86DisasemblerEmitter.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef X86MODRMFILTERS_H
|
||||
#define X86MODRMFILTERS_H
|
||||
|
||||
#include "llvm/System/DataTypes.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace X86Disassembler {
|
||||
|
||||
/// ModRMFilter - Abstract base class for clases that recognize patterns in
|
||||
/// ModR/M bytes.
|
||||
class ModRMFilter {
|
||||
public:
|
||||
/// Destructor - Override as necessary.
|
||||
virtual ~ModRMFilter() { }
|
||||
|
||||
/// isDumb - Indicates whether this filter returns the same value for
|
||||
/// any value of the ModR/M byte.
|
||||
///
|
||||
/// @result - True if the filter returns the same value for any ModR/M
|
||||
/// byte; false if not.
|
||||
virtual bool isDumb() const { return false; }
|
||||
|
||||
/// accepts - Indicates whether the filter accepts a particular ModR/M
|
||||
/// byte value.
|
||||
///
|
||||
/// @result - True if the filter accepts the ModR/M byte; false if not.
|
||||
virtual bool accepts(uint8_t modRM) const = 0;
|
||||
};
|
||||
|
||||
/// DumbFilter - Accepts any ModR/M byte. Used for instructions that do not
|
||||
/// require a ModR/M byte or instructions where the entire ModR/M byte is used
|
||||
/// for operands.
|
||||
class DumbFilter : public ModRMFilter {
|
||||
public:
|
||||
bool isDumb() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool accepts(uint8_t modRM) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/// ModFilter - Filters based on the mod bits [bits 7-6] of the ModR/M byte.
|
||||
/// Some instructions are classified based on whether they are 11 or anything
|
||||
/// else. This filter performs that classification.
|
||||
class ModFilter : public ModRMFilter {
|
||||
private:
|
||||
bool R;
|
||||
public:
|
||||
/// Constructor
|
||||
///
|
||||
/// @r - True if the mod bits of the ModR/M byte must be 11; false
|
||||
/// otherwise. The name r derives from the fact that the mod
|
||||
/// bits indicate whether the R/M bits [bits 2-0] signify a
|
||||
/// register or a memory operand.
|
||||
ModFilter(bool r) :
|
||||
ModRMFilter(),
|
||||
R(r) {
|
||||
}
|
||||
|
||||
bool accepts(uint8_t modRM) const {
|
||||
if (R == ((modRM & 0xc0) == 0xc0))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// EscapeFilter - Filters escape opcodes, which are classified in two ways. If
|
||||
/// the ModR/M byte is between 0xc0 and 0xff, then there is one slot for each
|
||||
/// possible value. Otherwise, there is one instruction for each value of the
|
||||
/// nnn field [bits 5-3], known elsewhere as the reg field.
|
||||
class EscapeFilter : public ModRMFilter {
|
||||
private:
|
||||
bool C0_FF;
|
||||
uint8_t NNN_or_ModRM;
|
||||
public:
|
||||
/// Constructor
|
||||
///
|
||||
/// @c0_ff - True if the ModR/M byte must fall between 0xc0 and 0xff;
|
||||
/// false otherwise.
|
||||
/// @nnn_or_modRM - If c0_ff is true, the required value of the entire ModR/M
|
||||
/// byte. If c0_ff is false, the required value of the nnn
|
||||
/// field.
|
||||
EscapeFilter(bool c0_ff, uint8_t nnn_or_modRM) :
|
||||
ModRMFilter(),
|
||||
C0_FF(c0_ff),
|
||||
NNN_or_ModRM(nnn_or_modRM) {
|
||||
}
|
||||
|
||||
bool accepts(uint8_t modRM) const {
|
||||
if ((C0_FF && modRM >= 0xc0 && (modRM == NNN_or_ModRM)) ||
|
||||
(!C0_FF && modRM < 0xc0 && ((modRM & 0x38) >> 3) == NNN_or_ModRM))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// AddRegEscapeFilter - Some escape opcodes have one of the register operands
|
||||
/// added to the ModR/M byte, meaning that a range of eight ModR/M values
|
||||
/// maps to a single instruction. Such instructions require the ModR/M byte
|
||||
/// to fall between 0xc0 and 0xff.
|
||||
class AddRegEscapeFilter : public ModRMFilter {
|
||||
private:
|
||||
uint8_t ModRM;
|
||||
public:
|
||||
/// Constructor
|
||||
///
|
||||
/// @modRM - The value of the ModR/M byte when the register operand
|
||||
/// refers to the first register in the register set.
|
||||
AddRegEscapeFilter(uint8_t modRM) : ModRM(modRM) {
|
||||
}
|
||||
|
||||
bool accepts(uint8_t modRM) const {
|
||||
if (modRM >= ModRM && modRM < ModRM + 8)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// ExtendedFilter - Extended opcodes are classified based on the value of the
|
||||
/// mod field [bits 7-6] and the value of the nnn field [bits 5-3].
|
||||
class ExtendedFilter : public ModRMFilter {
|
||||
private:
|
||||
bool R;
|
||||
uint8_t NNN;
|
||||
public:
|
||||
/// Constructor
|
||||
///
|
||||
/// @r - True if the mod field must be set to 11; false otherwise.
|
||||
/// The name is explained at ModFilter.
|
||||
/// @nnn - The required value of the nnn field.
|
||||
ExtendedFilter(bool r, uint8_t nnn) :
|
||||
ModRMFilter(),
|
||||
R(r),
|
||||
NNN(nnn) {
|
||||
}
|
||||
|
||||
bool accepts(uint8_t modRM) const {
|
||||
if (((R && ((modRM & 0xc0) == 0xc0)) ||
|
||||
(!R && ((modRM & 0xc0) != 0xc0))) &&
|
||||
(((modRM & 0x38) >> 3) == NNN))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR)
|
||||
/// requires the ModR/M byte to have a specific value.
|
||||
class ExactFilter : public ModRMFilter
|
||||
{
|
||||
private:
|
||||
uint8_t ModRM;
|
||||
public:
|
||||
/// Constructor
|
||||
///
|
||||
/// @modRM - The required value of the full ModR/M byte.
|
||||
ExactFilter(uint8_t modRM) :
|
||||
ModRMFilter(),
|
||||
ModRM(modRM) {
|
||||
}
|
||||
|
||||
bool accepts(uint8_t modRM) const {
|
||||
if (ModRM == modRM)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace X86Disassembler
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
|
@ -0,0 +1,959 @@
|
|||
//===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is part of the X86 Disassembler Emitter.
|
||||
// It contains the implementation of a single recognizable instruction.
|
||||
// Documentation for the disassembler emitter in general can be found in
|
||||
// X86DisasemblerEmitter.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86DisassemblerShared.h"
|
||||
#include "X86RecognizableInstr.h"
|
||||
#include "X86ModRMFilters.h"
|
||||
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// A clone of X86 since we can't depend on something that is generated.
|
||||
namespace X86Local {
|
||||
enum {
|
||||
Pseudo = 0,
|
||||
RawFrm = 1,
|
||||
AddRegFrm = 2,
|
||||
MRMDestReg = 3,
|
||||
MRMDestMem = 4,
|
||||
MRMSrcReg = 5,
|
||||
MRMSrcMem = 6,
|
||||
MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,
|
||||
MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
|
||||
MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
|
||||
MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
|
||||
MRMInitReg = 32
|
||||
};
|
||||
|
||||
enum {
|
||||
TB = 1,
|
||||
REP = 2,
|
||||
D8 = 3, D9 = 4, DA = 5, DB = 6,
|
||||
DC = 7, DD = 8, DE = 9, DF = 10,
|
||||
XD = 11, XS = 12,
|
||||
T8 = 13, TA = 14
|
||||
};
|
||||
}
|
||||
|
||||
#define ONE_BYTE_EXTENSION_TABLES \
|
||||
EXTENSION_TABLE(80) \
|
||||
EXTENSION_TABLE(81) \
|
||||
EXTENSION_TABLE(82) \
|
||||
EXTENSION_TABLE(83) \
|
||||
EXTENSION_TABLE(8f) \
|
||||
EXTENSION_TABLE(c0) \
|
||||
EXTENSION_TABLE(c1) \
|
||||
EXTENSION_TABLE(c6) \
|
||||
EXTENSION_TABLE(c7) \
|
||||
EXTENSION_TABLE(d0) \
|
||||
EXTENSION_TABLE(d1) \
|
||||
EXTENSION_TABLE(d2) \
|
||||
EXTENSION_TABLE(d3) \
|
||||
EXTENSION_TABLE(f6) \
|
||||
EXTENSION_TABLE(f7) \
|
||||
EXTENSION_TABLE(fe) \
|
||||
EXTENSION_TABLE(ff)
|
||||
|
||||
#define TWO_BYTE_EXTENSION_TABLES \
|
||||
EXTENSION_TABLE(00) \
|
||||
EXTENSION_TABLE(01) \
|
||||
EXTENSION_TABLE(18) \
|
||||
EXTENSION_TABLE(71) \
|
||||
EXTENSION_TABLE(72) \
|
||||
EXTENSION_TABLE(73) \
|
||||
EXTENSION_TABLE(ae) \
|
||||
EXTENSION_TABLE(b9) \
|
||||
EXTENSION_TABLE(ba) \
|
||||
EXTENSION_TABLE(c7)
|
||||
|
||||
#define TWO_BYTE_FULL_EXTENSION_TABLES \
|
||||
EXTENSION_TABLE(01)
|
||||
|
||||
|
||||
using namespace X86Disassembler;
|
||||
|
||||
/// needsModRMForDecode - Indicates whether a particular instruction requires a
|
||||
/// ModR/M byte for the instruction to be properly decoded. For example, a
|
||||
/// MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to
|
||||
/// 0b11.
|
||||
///
|
||||
/// @param form - The form of the instruction.
|
||||
/// @return - true if the form implies that a ModR/M byte is required, false
|
||||
/// otherwise.
|
||||
static bool needsModRMForDecode(uint8_t form) {
|
||||
if (form == X86Local::MRMDestReg ||
|
||||
form == X86Local::MRMDestMem ||
|
||||
form == X86Local::MRMSrcReg ||
|
||||
form == X86Local::MRMSrcMem ||
|
||||
(form >= X86Local::MRM0r && form <= X86Local::MRM7r) ||
|
||||
(form >= X86Local::MRM0m && form <= X86Local::MRM7m))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isRegFormat - Indicates whether a particular form requires the Mod field of
|
||||
/// the ModR/M byte to be 0b11.
|
||||
///
|
||||
/// @param form - The form of the instruction.
|
||||
/// @return - true if the form implies that Mod must be 0b11, false
|
||||
/// otherwise.
|
||||
static bool isRegFormat(uint8_t form) {
|
||||
if (form == X86Local::MRMDestReg ||
|
||||
form == X86Local::MRMSrcReg ||
|
||||
(form >= X86Local::MRM0r && form <= X86Local::MRM7r))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit.
|
||||
/// Useful for switch statements and the like.
|
||||
///
|
||||
/// @param init - A reference to the BitsInit to be decoded.
|
||||
/// @return - The field, with the first bit in the BitsInit as the lowest
|
||||
/// order bit.
|
||||
static uint8_t byteFromBitsInit(BitsInit &init) {
|
||||
int width = init.getNumBits();
|
||||
|
||||
assert(width <= 8 && "Field is too large for uint8_t!");
|
||||
|
||||
int index;
|
||||
uint8_t mask = 0x01;
|
||||
|
||||
uint8_t ret = 0;
|
||||
|
||||
for (index = 0; index < width; index++) {
|
||||
if (static_cast<BitInit*>(init.getBit(index))->getValue())
|
||||
ret |= mask;
|
||||
|
||||
mask <<= 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// byteFromRec - Extract a value at most 8 bits in with from a Record given the
|
||||
/// name of the field.
|
||||
///
|
||||
/// @param rec - The record from which to extract the value.
|
||||
/// @param name - The name of the field in the record.
|
||||
/// @return - The field, as translated by byteFromBitsInit().
|
||||
static uint8_t byteFromRec(const Record* rec, const std::string &name) {
|
||||
BitsInit* bits = rec->getValueAsBitsInit(name);
|
||||
return byteFromBitsInit(*bits);
|
||||
}
|
||||
|
||||
RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
|
||||
const CodeGenInstruction &insn,
|
||||
InstrUID uid) {
|
||||
UID = uid;
|
||||
|
||||
Rec = insn.TheDef;
|
||||
Name = Rec->getName();
|
||||
Spec = &tables.specForUID(UID);
|
||||
|
||||
if (!Rec->isSubClassOf("X86Inst")) {
|
||||
ShouldBeEmitted = false;
|
||||
return;
|
||||
}
|
||||
|
||||
Prefix = byteFromRec(Rec, "Prefix");
|
||||
Opcode = byteFromRec(Rec, "Opcode");
|
||||
Form = byteFromRec(Rec, "FormBits");
|
||||
SegOvr = byteFromRec(Rec, "SegOvrBits");
|
||||
|
||||
HasOpSizePrefix = Rec->getValueAsBit("hasOpSizePrefix");
|
||||
HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix");
|
||||
HasLockPrefix = Rec->getValueAsBit("hasLockPrefix");
|
||||
IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
|
||||
|
||||
Name = Rec->getName();
|
||||
AsmString = Rec->getValueAsString("AsmString");
|
||||
|
||||
Operands = &insn.OperandList;
|
||||
|
||||
IsSSE = HasOpSizePrefix && (Name.find("16") == Name.npos);
|
||||
HasFROperands = false;
|
||||
|
||||
ShouldBeEmitted = true;
|
||||
}
|
||||
|
||||
void RecognizableInstr::processInstr(DisassemblerTables &tables,
|
||||
const CodeGenInstruction &insn,
|
||||
InstrUID uid)
|
||||
{
|
||||
RecognizableInstr recogInstr(tables, insn, uid);
|
||||
|
||||
recogInstr.emitInstructionSpecifier(tables);
|
||||
|
||||
if (recogInstr.shouldBeEmitted())
|
||||
recogInstr.emitDecodePath(tables);
|
||||
}
|
||||
|
||||
InstructionContext RecognizableInstr::insnContext() const {
|
||||
InstructionContext insnContext;
|
||||
|
||||
if (Name.find("64") != Name.npos || HasREX_WPrefix) {
|
||||
if (HasREX_WPrefix && HasOpSizePrefix)
|
||||
insnContext = IC_64BIT_REXW_OPSIZE;
|
||||
else if (HasOpSizePrefix)
|
||||
insnContext = IC_64BIT_OPSIZE;
|
||||
else if (HasREX_WPrefix && Prefix == X86Local::XS)
|
||||
insnContext = IC_64BIT_REXW_XS;
|
||||
else if (HasREX_WPrefix && Prefix == X86Local::XD)
|
||||
insnContext = IC_64BIT_REXW_XD;
|
||||
else if (Prefix == X86Local::XD)
|
||||
insnContext = IC_64BIT_XD;
|
||||
else if (Prefix == X86Local::XS)
|
||||
insnContext = IC_64BIT_XS;
|
||||
else if (HasREX_WPrefix)
|
||||
insnContext = IC_64BIT_REXW;
|
||||
else
|
||||
insnContext = IC_64BIT;
|
||||
} else {
|
||||
if (HasOpSizePrefix)
|
||||
insnContext = IC_OPSIZE;
|
||||
else if (Prefix == X86Local::XD)
|
||||
insnContext = IC_XD;
|
||||
else if (Prefix == X86Local::XS)
|
||||
insnContext = IC_XS;
|
||||
else
|
||||
insnContext = IC;
|
||||
}
|
||||
|
||||
return insnContext;
|
||||
}
|
||||
|
||||
RecognizableInstr::filter_ret RecognizableInstr::filter() const {
|
||||
// Filter out intrinsics
|
||||
|
||||
if (!Rec->isSubClassOf("X86Inst"))
|
||||
return FILTER_STRONG;
|
||||
|
||||
if (Form == X86Local::Pseudo ||
|
||||
IsCodeGenOnly)
|
||||
return FILTER_STRONG;
|
||||
|
||||
// Filter out instructions with a LOCK prefix;
|
||||
// prefer forms that do not have the prefix
|
||||
if (HasLockPrefix)
|
||||
return FILTER_WEAK;
|
||||
|
||||
// Filter out artificial instructions
|
||||
|
||||
if (Name.find("TAILJMP") != Name.npos ||
|
||||
Name.find("_Int") != Name.npos ||
|
||||
Name.find("_int") != Name.npos ||
|
||||
Name.find("Int_") != Name.npos ||
|
||||
Name.find("_NOREX") != Name.npos ||
|
||||
Name.find("EH_RETURN") != Name.npos ||
|
||||
Name.find("V_SET") != Name.npos ||
|
||||
Name.find("LOCK_") != Name.npos ||
|
||||
Name.find("WIN") != Name.npos)
|
||||
return FILTER_STRONG;
|
||||
|
||||
// Special cases.
|
||||
|
||||
if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI")
|
||||
return FILTER_WEAK;
|
||||
if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI")
|
||||
return FILTER_WEAK;
|
||||
|
||||
if (Name.find("MOV") != Name.npos && Name.find("r0") != Name.npos)
|
||||
return FILTER_WEAK;
|
||||
if (Name.find("MOVZ") != Name.npos && Name.find("MOVZX") == Name.npos)
|
||||
return FILTER_WEAK;
|
||||
if (Name.find("Fs") != Name.npos)
|
||||
return FILTER_WEAK;
|
||||
if (Name == "MOVLPDrr" ||
|
||||
Name == "MOVLPSrr" ||
|
||||
Name == "PUSHFQ" ||
|
||||
Name == "BSF16rr" ||
|
||||
Name == "BSF16rm" ||
|
||||
Name == "BSR16rr" ||
|
||||
Name == "BSR16rm" ||
|
||||
Name == "MOVSX16rm8" ||
|
||||
Name == "MOVSX16rr8" ||
|
||||
Name == "MOVZX16rm8" ||
|
||||
Name == "MOVZX16rr8" ||
|
||||
Name == "PUSH32i16" ||
|
||||
Name == "PUSH64i16" ||
|
||||
Name == "MOVPQI2QImr" ||
|
||||
Name == "MOVSDmr" ||
|
||||
Name == "MOVSDrm" ||
|
||||
Name == "MOVSSmr" ||
|
||||
Name == "MOVSSrm" ||
|
||||
Name == "MMX_MOVD64rrv164" ||
|
||||
Name == "CRC32m16" ||
|
||||
Name == "MOV64ri64i32" ||
|
||||
Name == "CRC32r16")
|
||||
return FILTER_WEAK;
|
||||
|
||||
// Filter out instructions with segment override prefixes.
|
||||
// They're too messy to handle now and we'll special case them if needed.
|
||||
|
||||
if (SegOvr)
|
||||
return FILTER_STRONG;
|
||||
|
||||
// Filter out instructions that can't be printed.
|
||||
|
||||
if (AsmString.size() == 0)
|
||||
return FILTER_STRONG;
|
||||
|
||||
// Filter out instructions with subreg operands.
|
||||
|
||||
if (AsmString.find("subreg") != AsmString.npos)
|
||||
return FILTER_STRONG;
|
||||
|
||||
assert(Form != X86Local::MRMInitReg &&
|
||||
"FORMAT_MRMINITREG instruction not skipped");
|
||||
|
||||
if (HasFROperands && Name.find("MOV") != Name.npos &&
|
||||
((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
|
||||
(Name.find("to") != Name.npos)))
|
||||
return FILTER_WEAK;
|
||||
|
||||
return FILTER_NORMAL;
|
||||
}
|
||||
|
||||
void RecognizableInstr::handleOperand(
|
||||
bool optional,
|
||||
unsigned &operandIndex,
|
||||
unsigned &physicalOperandIndex,
|
||||
unsigned &numPhysicalOperands,
|
||||
unsigned *operandMapping,
|
||||
OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) {
|
||||
if (optional) {
|
||||
if (physicalOperandIndex >= numPhysicalOperands)
|
||||
return;
|
||||
} else {
|
||||
assert(physicalOperandIndex < numPhysicalOperands);
|
||||
}
|
||||
|
||||
while (operandMapping[operandIndex] != operandIndex) {
|
||||
Spec->operands[operandIndex].encoding = ENCODING_DUP;
|
||||
Spec->operands[operandIndex].type =
|
||||
(OperandType)(TYPE_DUP0 + operandMapping[operandIndex]);
|
||||
++operandIndex;
|
||||
}
|
||||
|
||||
const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
|
||||
|
||||
Spec->operands[operandIndex].encoding = encodingFromString(typeName,
|
||||
HasOpSizePrefix);
|
||||
Spec->operands[operandIndex].type = typeFromString(typeName,
|
||||
IsSSE,
|
||||
HasREX_WPrefix,
|
||||
HasOpSizePrefix);
|
||||
|
||||
++operandIndex;
|
||||
++physicalOperandIndex;
|
||||
}
|
||||
|
||||
void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
|
||||
Spec->name = Name;
|
||||
|
||||
if (!Rec->isSubClassOf("X86Inst"))
|
||||
return;
|
||||
|
||||
switch (filter()) {
|
||||
case FILTER_WEAK:
|
||||
Spec->filtered = true;
|
||||
break;
|
||||
case FILTER_STRONG:
|
||||
ShouldBeEmitted = false;
|
||||
return;
|
||||
case FILTER_NORMAL:
|
||||
break;
|
||||
}
|
||||
|
||||
Spec->insnContext = insnContext();
|
||||
|
||||
const std::vector<CodeGenInstruction::OperandInfo> &OperandList = *Operands;
|
||||
|
||||
unsigned operandIndex;
|
||||
unsigned numOperands = OperandList.size();
|
||||
unsigned numPhysicalOperands = 0;
|
||||
|
||||
// operandMapping maps from operands in OperandList to their originals.
|
||||
// If operandMapping[i] != i, then the entry is a duplicate.
|
||||
unsigned operandMapping[X86_MAX_OPERANDS];
|
||||
|
||||
bool hasFROperands = false;
|
||||
|
||||
assert(numOperands < X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
|
||||
|
||||
for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
|
||||
if (OperandList[operandIndex].Constraints.size()) {
|
||||
const std::string &constraint = OperandList[operandIndex].Constraints[0];
|
||||
std::string::size_type tiedToPos;
|
||||
|
||||
if ((tiedToPos = constraint.find(" << 16) | (1 << TOI::TIED_TO))")) !=
|
||||
constraint.npos) {
|
||||
tiedToPos--;
|
||||
operandMapping[operandIndex] = constraint[tiedToPos] - '0';
|
||||
} else {
|
||||
++numPhysicalOperands;
|
||||
operandMapping[operandIndex] = operandIndex;
|
||||
}
|
||||
} else {
|
||||
++numPhysicalOperands;
|
||||
operandMapping[operandIndex] = operandIndex;
|
||||
}
|
||||
|
||||
const std::string &recName = OperandList[operandIndex].Rec->getName();
|
||||
|
||||
if (recName.find("FR") != recName.npos)
|
||||
hasFROperands = true;
|
||||
}
|
||||
|
||||
if (hasFROperands && Name.find("MOV") != Name.npos &&
|
||||
((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
|
||||
(Name.find("to") != Name.npos)))
|
||||
ShouldBeEmitted = false;
|
||||
|
||||
if (!ShouldBeEmitted)
|
||||
return;
|
||||
|
||||
#define HANDLE_OPERAND(class) \
|
||||
handleOperand(false, \
|
||||
operandIndex, \
|
||||
physicalOperandIndex, \
|
||||
numPhysicalOperands, \
|
||||
operandMapping, \
|
||||
class##EncodingFromString);
|
||||
|
||||
#define HANDLE_OPTIONAL(class) \
|
||||
handleOperand(true, \
|
||||
operandIndex, \
|
||||
physicalOperandIndex, \
|
||||
numPhysicalOperands, \
|
||||
operandMapping, \
|
||||
class##EncodingFromString);
|
||||
|
||||
// operandIndex should always be < numOperands
|
||||
operandIndex = 0;
|
||||
// physicalOperandIndex should always be < numPhysicalOperands
|
||||
unsigned physicalOperandIndex = 0;
|
||||
|
||||
switch (Form) {
|
||||
case X86Local::RawFrm:
|
||||
// Operand 1 (optional) is an address or immediate.
|
||||
// Operand 2 (optional) is an immediate.
|
||||
assert(numPhysicalOperands <= 2 &&
|
||||
"Unexpected number of operands for RawFrm");
|
||||
HANDLE_OPTIONAL(relocation)
|
||||
HANDLE_OPTIONAL(immediate)
|
||||
break;
|
||||
case X86Local::AddRegFrm:
|
||||
// Operand 1 is added to the opcode.
|
||||
// Operand 2 (optional) is an address.
|
||||
assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
|
||||
"Unexpected number of operands for AddRegFrm");
|
||||
HANDLE_OPERAND(opcodeModifier)
|
||||
HANDLE_OPTIONAL(relocation)
|
||||
break;
|
||||
case X86Local::MRMDestReg:
|
||||
// Operand 1 is a register operand in the R/M field.
|
||||
// Operand 2 is a register operand in the Reg/Opcode field.
|
||||
// Operand 3 (optional) is an immediate.
|
||||
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
|
||||
"Unexpected number of operands for MRMDestRegFrm");
|
||||
HANDLE_OPERAND(rmRegister)
|
||||
HANDLE_OPERAND(roRegister)
|
||||
HANDLE_OPTIONAL(immediate)
|
||||
break;
|
||||
case X86Local::MRMDestMem:
|
||||
// Operand 1 is a memory operand (possibly SIB-extended)
|
||||
// Operand 2 is a register operand in the Reg/Opcode field.
|
||||
// Operand 3 (optional) is an immediate.
|
||||
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
|
||||
"Unexpected number of operands for MRMDestMemFrm");
|
||||
HANDLE_OPERAND(memory)
|
||||
HANDLE_OPERAND(roRegister)
|
||||
HANDLE_OPTIONAL(immediate)
|
||||
break;
|
||||
case X86Local::MRMSrcReg:
|
||||
// Operand 1 is a register operand in the Reg/Opcode field.
|
||||
// Operand 2 is a register operand in the R/M field.
|
||||
// Operand 3 (optional) is an immediate.
|
||||
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
|
||||
"Unexpected number of operands for MRMSrcRegFrm");
|
||||
HANDLE_OPERAND(roRegister)
|
||||
HANDLE_OPERAND(rmRegister)
|
||||
HANDLE_OPTIONAL(immediate)
|
||||
break;
|
||||
case X86Local::MRMSrcMem:
|
||||
// Operand 1 is a register operand in the Reg/Opcode field.
|
||||
// Operand 2 is a memory operand (possibly SIB-extended)
|
||||
// Operand 3 (optional) is an immediate.
|
||||
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
|
||||
"Unexpected number of operands for MRMSrcMemFrm");
|
||||
HANDLE_OPERAND(roRegister)
|
||||
HANDLE_OPERAND(memory)
|
||||
HANDLE_OPTIONAL(immediate)
|
||||
break;
|
||||
case X86Local::MRM0r:
|
||||
case X86Local::MRM1r:
|
||||
case X86Local::MRM2r:
|
||||
case X86Local::MRM3r:
|
||||
case X86Local::MRM4r:
|
||||
case X86Local::MRM5r:
|
||||
case X86Local::MRM6r:
|
||||
case X86Local::MRM7r:
|
||||
// Operand 1 is a register operand in the R/M field.
|
||||
// Operand 2 (optional) is an immediate or relocation.
|
||||
assert(numPhysicalOperands <= 2 &&
|
||||
"Unexpected number of operands for MRMnRFrm");
|
||||
HANDLE_OPTIONAL(rmRegister)
|
||||
HANDLE_OPTIONAL(relocation)
|
||||
break;
|
||||
case X86Local::MRM0m:
|
||||
case X86Local::MRM1m:
|
||||
case X86Local::MRM2m:
|
||||
case X86Local::MRM3m:
|
||||
case X86Local::MRM4m:
|
||||
case X86Local::MRM5m:
|
||||
case X86Local::MRM6m:
|
||||
case X86Local::MRM7m:
|
||||
// Operand 1 is a memory operand (possibly SIB-extended)
|
||||
// Operand 2 (optional) is an immediate or relocation.
|
||||
assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
|
||||
"Unexpected number of operands for MRMnMFrm");
|
||||
HANDLE_OPERAND(memory)
|
||||
HANDLE_OPTIONAL(relocation)
|
||||
break;
|
||||
case X86Local::MRMInitReg:
|
||||
// Ignored.
|
||||
break;
|
||||
}
|
||||
|
||||
#undef HANDLE_OPERAND
|
||||
#undef HANDLE_OPTIONAL
|
||||
}
|
||||
|
||||
void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
|
||||
// Special cases where the LLVM tables are not complete
|
||||
|
||||
#define EXACTCASE(class, name, lastbyte) \
|
||||
if (Name == name) { \
|
||||
tables.setTableFields(class, \
|
||||
insnContext(), \
|
||||
Opcode, \
|
||||
ExactFilter(lastbyte), \
|
||||
UID); \
|
||||
Spec->modifierBase = Opcode; \
|
||||
return; \
|
||||
}
|
||||
|
||||
EXACTCASE(TWOBYTE, "MONITOR", 0xc8)
|
||||
EXACTCASE(TWOBYTE, "MWAIT", 0xc9)
|
||||
EXACTCASE(TWOBYTE, "SWPGS", 0xf8)
|
||||
EXACTCASE(TWOBYTE, "INVEPT", 0x80)
|
||||
EXACTCASE(TWOBYTE, "INVVPID", 0x81)
|
||||
EXACTCASE(TWOBYTE, "VMCALL", 0xc1)
|
||||
EXACTCASE(TWOBYTE, "VMLAUNCH", 0xc2)
|
||||
EXACTCASE(TWOBYTE, "VMRESUME", 0xc3)
|
||||
EXACTCASE(TWOBYTE, "VMXOFF", 0xc4)
|
||||
|
||||
if (Name == "INVLPG") {
|
||||
tables.setTableFields(TWOBYTE,
|
||||
insnContext(),
|
||||
Opcode,
|
||||
ExtendedFilter(false, 7),
|
||||
UID);
|
||||
Spec->modifierBase = Opcode;
|
||||
return;
|
||||
}
|
||||
|
||||
OpcodeType opcodeType = (OpcodeType)-1;
|
||||
|
||||
ModRMFilter* filter = NULL;
|
||||
uint8_t opcodeToSet = 0;
|
||||
|
||||
switch (Prefix) {
|
||||
// Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f
|
||||
case X86Local::XD:
|
||||
case X86Local::XS:
|
||||
case X86Local::TB:
|
||||
opcodeType = TWOBYTE;
|
||||
|
||||
switch (Opcode) {
|
||||
#define EXTENSION_TABLE(n) case 0x##n:
|
||||
TWO_BYTE_EXTENSION_TABLES
|
||||
#undef EXTENSION_TABLE
|
||||
switch (Form) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled two-byte extended opcode");
|
||||
case X86Local::MRM0r:
|
||||
case X86Local::MRM1r:
|
||||
case X86Local::MRM2r:
|
||||
case X86Local::MRM3r:
|
||||
case X86Local::MRM4r:
|
||||
case X86Local::MRM5r:
|
||||
case X86Local::MRM6r:
|
||||
case X86Local::MRM7r:
|
||||
filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
|
||||
break;
|
||||
case X86Local::MRM0m:
|
||||
case X86Local::MRM1m:
|
||||
case X86Local::MRM2m:
|
||||
case X86Local::MRM3m:
|
||||
case X86Local::MRM4m:
|
||||
case X86Local::MRM5m:
|
||||
case X86Local::MRM6m:
|
||||
case X86Local::MRM7m:
|
||||
filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
|
||||
break;
|
||||
} // switch (Form)
|
||||
break;
|
||||
default:
|
||||
if (needsModRMForDecode(Form))
|
||||
filter = new ModFilter(isRegFormat(Form));
|
||||
else
|
||||
filter = new DumbFilter();
|
||||
|
||||
break;
|
||||
} // switch (opcode)
|
||||
opcodeToSet = Opcode;
|
||||
break;
|
||||
case X86Local::T8:
|
||||
opcodeType = THREEBYTE_38;
|
||||
if (needsModRMForDecode(Form))
|
||||
filter = new ModFilter(isRegFormat(Form));
|
||||
else
|
||||
filter = new DumbFilter();
|
||||
opcodeToSet = Opcode;
|
||||
break;
|
||||
case X86Local::TA:
|
||||
opcodeType = THREEBYTE_3A;
|
||||
if (needsModRMForDecode(Form))
|
||||
filter = new ModFilter(isRegFormat(Form));
|
||||
else
|
||||
filter = new DumbFilter();
|
||||
opcodeToSet = Opcode;
|
||||
break;
|
||||
case X86Local::D8:
|
||||
case X86Local::D9:
|
||||
case X86Local::DA:
|
||||
case X86Local::DB:
|
||||
case X86Local::DC:
|
||||
case X86Local::DD:
|
||||
case X86Local::DE:
|
||||
case X86Local::DF:
|
||||
assert(Opcode >= 0xc0 && "Unexpected opcode for an escape opcode");
|
||||
opcodeType = ONEBYTE;
|
||||
if (Form == X86Local::AddRegFrm) {
|
||||
Spec->modifierType = MODIFIER_MODRM;
|
||||
Spec->modifierBase = Opcode;
|
||||
filter = new AddRegEscapeFilter(Opcode);
|
||||
} else {
|
||||
filter = new EscapeFilter(true, Opcode);
|
||||
}
|
||||
opcodeToSet = 0xd8 + (Prefix - X86Local::D8);
|
||||
break;
|
||||
default:
|
||||
opcodeType = ONEBYTE;
|
||||
switch (Opcode) {
|
||||
#define EXTENSION_TABLE(n) case 0x##n:
|
||||
ONE_BYTE_EXTENSION_TABLES
|
||||
#undef EXTENSION_TABLE
|
||||
switch (Form) {
|
||||
default:
|
||||
llvm_unreachable("Fell through the cracks of a single-byte "
|
||||
"extended opcode");
|
||||
case X86Local::MRM0r:
|
||||
case X86Local::MRM1r:
|
||||
case X86Local::MRM2r:
|
||||
case X86Local::MRM3r:
|
||||
case X86Local::MRM4r:
|
||||
case X86Local::MRM5r:
|
||||
case X86Local::MRM6r:
|
||||
case X86Local::MRM7r:
|
||||
filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
|
||||
break;
|
||||
case X86Local::MRM0m:
|
||||
case X86Local::MRM1m:
|
||||
case X86Local::MRM2m:
|
||||
case X86Local::MRM3m:
|
||||
case X86Local::MRM4m:
|
||||
case X86Local::MRM5m:
|
||||
case X86Local::MRM6m:
|
||||
case X86Local::MRM7m:
|
||||
filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
|
||||
break;
|
||||
} // switch (Form)
|
||||
break;
|
||||
case 0xd8:
|
||||
case 0xd9:
|
||||
case 0xda:
|
||||
case 0xdb:
|
||||
case 0xdc:
|
||||
case 0xdd:
|
||||
case 0xde:
|
||||
case 0xdf:
|
||||
filter = new EscapeFilter(false, Form - X86Local::MRM0m);
|
||||
break;
|
||||
default:
|
||||
if (needsModRMForDecode(Form))
|
||||
filter = new ModFilter(isRegFormat(Form));
|
||||
else
|
||||
filter = new DumbFilter();
|
||||
break;
|
||||
} // switch (Opcode)
|
||||
opcodeToSet = Opcode;
|
||||
} // switch (Prefix)
|
||||
|
||||
assert(opcodeType != (OpcodeType)-1 &&
|
||||
"Opcode type not set");
|
||||
assert(filter && "Filter not set");
|
||||
|
||||
if (Form == X86Local::AddRegFrm) {
|
||||
if(Spec->modifierType != MODIFIER_MODRM) {
|
||||
assert(opcodeToSet < 0xf9 &&
|
||||
"Not enough room for all ADDREG_FRM operands");
|
||||
|
||||
uint8_t currentOpcode;
|
||||
|
||||
for (currentOpcode = opcodeToSet;
|
||||
currentOpcode < opcodeToSet + 8;
|
||||
++currentOpcode)
|
||||
tables.setTableFields(opcodeType,
|
||||
insnContext(),
|
||||
currentOpcode,
|
||||
*filter,
|
||||
UID);
|
||||
|
||||
Spec->modifierType = MODIFIER_OPCODE;
|
||||
Spec->modifierBase = opcodeToSet;
|
||||
} else {
|
||||
// modifierBase was set where MODIFIER_MODRM was set
|
||||
tables.setTableFields(opcodeType,
|
||||
insnContext(),
|
||||
opcodeToSet,
|
||||
*filter,
|
||||
UID);
|
||||
}
|
||||
} else {
|
||||
tables.setTableFields(opcodeType,
|
||||
insnContext(),
|
||||
opcodeToSet,
|
||||
*filter,
|
||||
UID);
|
||||
|
||||
Spec->modifierType = MODIFIER_NONE;
|
||||
Spec->modifierBase = opcodeToSet;
|
||||
}
|
||||
|
||||
delete filter;
|
||||
}
|
||||
|
||||
#define TYPE(str, type) if (s == str) return type;
|
||||
OperandType RecognizableInstr::typeFromString(const std::string &s,
|
||||
bool isSSE,
|
||||
bool hasREX_WPrefix,
|
||||
bool hasOpSizePrefix) {
|
||||
if (isSSE) {
|
||||
// For SSE instructions, we ignore the OpSize prefix and force operand
|
||||
// sizes.
|
||||
TYPE("GR16", TYPE_R16)
|
||||
TYPE("GR32", TYPE_R32)
|
||||
TYPE("GR64", TYPE_R64)
|
||||
}
|
||||
if(hasREX_WPrefix) {
|
||||
// For instructions with a REX_W prefix, a declared 32-bit register encoding
|
||||
// is special.
|
||||
TYPE("GR32", TYPE_R32)
|
||||
}
|
||||
if(!hasOpSizePrefix) {
|
||||
// For instructions without an OpSize prefix, a declared 16-bit register or
|
||||
// immediate encoding is special.
|
||||
TYPE("GR16", TYPE_R16)
|
||||
TYPE("i16imm", TYPE_IMM16)
|
||||
}
|
||||
TYPE("i16mem", TYPE_Mv)
|
||||
TYPE("i16imm", TYPE_IMMv)
|
||||
TYPE("i16i8imm", TYPE_IMMv)
|
||||
TYPE("GR16", TYPE_Rv)
|
||||
TYPE("i32mem", TYPE_Mv)
|
||||
TYPE("i32imm", TYPE_IMMv)
|
||||
TYPE("i32i8imm", TYPE_IMM32)
|
||||
TYPE("GR32", TYPE_Rv)
|
||||
TYPE("i64mem", TYPE_Mv)
|
||||
TYPE("i64i32imm", TYPE_IMM64)
|
||||
TYPE("i64i8imm", TYPE_IMM64)
|
||||
TYPE("GR64", TYPE_R64)
|
||||
TYPE("i8mem", TYPE_M8)
|
||||
TYPE("i8imm", TYPE_IMM8)
|
||||
TYPE("GR8", TYPE_R8)
|
||||
TYPE("VR128", TYPE_XMM128)
|
||||
TYPE("f128mem", TYPE_M128)
|
||||
TYPE("FR64", TYPE_XMM64)
|
||||
TYPE("f64mem", TYPE_M64FP)
|
||||
TYPE("FR32", TYPE_XMM32)
|
||||
TYPE("f32mem", TYPE_M32FP)
|
||||
TYPE("RST", TYPE_ST)
|
||||
TYPE("i128mem", TYPE_M128)
|
||||
TYPE("i64i32imm_pcrel", TYPE_REL64)
|
||||
TYPE("i32imm_pcrel", TYPE_REL32)
|
||||
TYPE("SSECC", TYPE_IMM8)
|
||||
TYPE("brtarget", TYPE_RELv)
|
||||
TYPE("brtarget8", TYPE_REL8)
|
||||
TYPE("f80mem", TYPE_M80FP)
|
||||
TYPE("lea32mem", TYPE_M32)
|
||||
TYPE("lea64_32mem", TYPE_M64)
|
||||
TYPE("lea64mem", TYPE_M64)
|
||||
TYPE("VR64", TYPE_MM64)
|
||||
TYPE("i64imm", TYPE_IMMv)
|
||||
TYPE("opaque32mem", TYPE_M1616)
|
||||
TYPE("opaque48mem", TYPE_M1632)
|
||||
TYPE("opaque80mem", TYPE_M1664)
|
||||
TYPE("opaque512mem", TYPE_M512)
|
||||
TYPE("SEGMENT_REG", TYPE_SEGMENTREG)
|
||||
TYPE("DEBUG_REG", TYPE_DEBUGREG)
|
||||
TYPE("CONTROL_REG_32", TYPE_CR32)
|
||||
TYPE("CONTROL_REG_64", TYPE_CR64)
|
||||
TYPE("offset8", TYPE_MOFFS8)
|
||||
TYPE("offset16", TYPE_MOFFS16)
|
||||
TYPE("offset32", TYPE_MOFFS32)
|
||||
TYPE("offset64", TYPE_MOFFS64)
|
||||
errs() << "Unhandled type string " << s << "\n";
|
||||
llvm_unreachable("Unhandled type string");
|
||||
}
|
||||
#undef TYPE
|
||||
|
||||
#define ENCODING(str, encoding) if (s == str) return encoding;
|
||||
OperandEncoding RecognizableInstr::immediateEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
if(!hasOpSizePrefix) {
|
||||
// For instructions without an OpSize prefix, a declared 16-bit register or
|
||||
// immediate encoding is special.
|
||||
ENCODING("i16imm", ENCODING_IW)
|
||||
}
|
||||
ENCODING("i32i8imm", ENCODING_IB)
|
||||
ENCODING("SSECC", ENCODING_IB)
|
||||
ENCODING("i16imm", ENCODING_Iv)
|
||||
ENCODING("i16i8imm", ENCODING_IB)
|
||||
ENCODING("i32imm", ENCODING_Iv)
|
||||
ENCODING("i64i32imm", ENCODING_ID)
|
||||
ENCODING("i64i8imm", ENCODING_IB)
|
||||
ENCODING("i8imm", ENCODING_IB)
|
||||
errs() << "Unhandled immediate encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled immediate encoding");
|
||||
}
|
||||
|
||||
OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
ENCODING("GR16", ENCODING_RM)
|
||||
ENCODING("GR32", ENCODING_RM)
|
||||
ENCODING("GR64", ENCODING_RM)
|
||||
ENCODING("GR8", ENCODING_RM)
|
||||
ENCODING("VR128", ENCODING_RM)
|
||||
ENCODING("FR64", ENCODING_RM)
|
||||
ENCODING("FR32", ENCODING_RM)
|
||||
ENCODING("VR64", ENCODING_RM)
|
||||
errs() << "Unhandled R/M register encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled R/M register encoding");
|
||||
}
|
||||
|
||||
OperandEncoding RecognizableInstr::roRegisterEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
ENCODING("GR16", ENCODING_REG)
|
||||
ENCODING("GR32", ENCODING_REG)
|
||||
ENCODING("GR64", ENCODING_REG)
|
||||
ENCODING("GR8", ENCODING_REG)
|
||||
ENCODING("VR128", ENCODING_REG)
|
||||
ENCODING("FR64", ENCODING_REG)
|
||||
ENCODING("FR32", ENCODING_REG)
|
||||
ENCODING("VR64", ENCODING_REG)
|
||||
ENCODING("SEGMENT_REG", ENCODING_REG)
|
||||
ENCODING("DEBUG_REG", ENCODING_REG)
|
||||
ENCODING("CONTROL_REG_32", ENCODING_REG)
|
||||
ENCODING("CONTROL_REG_64", ENCODING_REG)
|
||||
errs() << "Unhandled reg/opcode register encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled reg/opcode register encoding");
|
||||
}
|
||||
|
||||
OperandEncoding RecognizableInstr::memoryEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
ENCODING("i16mem", ENCODING_RM)
|
||||
ENCODING("i32mem", ENCODING_RM)
|
||||
ENCODING("i64mem", ENCODING_RM)
|
||||
ENCODING("i8mem", ENCODING_RM)
|
||||
ENCODING("f128mem", ENCODING_RM)
|
||||
ENCODING("f64mem", ENCODING_RM)
|
||||
ENCODING("f32mem", ENCODING_RM)
|
||||
ENCODING("i128mem", ENCODING_RM)
|
||||
ENCODING("f80mem", ENCODING_RM)
|
||||
ENCODING("lea32mem", ENCODING_RM)
|
||||
ENCODING("lea64_32mem", ENCODING_RM)
|
||||
ENCODING("lea64mem", ENCODING_RM)
|
||||
ENCODING("opaque32mem", ENCODING_RM)
|
||||
ENCODING("opaque48mem", ENCODING_RM)
|
||||
ENCODING("opaque80mem", ENCODING_RM)
|
||||
ENCODING("opaque512mem", ENCODING_RM)
|
||||
errs() << "Unhandled memory encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled memory encoding");
|
||||
}
|
||||
|
||||
OperandEncoding RecognizableInstr::relocationEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
if(!hasOpSizePrefix) {
|
||||
// For instructions without an OpSize prefix, a declared 16-bit register or
|
||||
// immediate encoding is special.
|
||||
ENCODING("i16imm", ENCODING_IW)
|
||||
}
|
||||
ENCODING("i16imm", ENCODING_Iv)
|
||||
ENCODING("i16i8imm", ENCODING_IB)
|
||||
ENCODING("i32imm", ENCODING_Iv)
|
||||
ENCODING("i32i8imm", ENCODING_IB)
|
||||
ENCODING("i64i32imm", ENCODING_ID)
|
||||
ENCODING("i64i8imm", ENCODING_IB)
|
||||
ENCODING("i8imm", ENCODING_IB)
|
||||
ENCODING("i64i32imm_pcrel", ENCODING_ID)
|
||||
ENCODING("i32imm_pcrel", ENCODING_ID)
|
||||
ENCODING("brtarget", ENCODING_Iv)
|
||||
ENCODING("brtarget8", ENCODING_IB)
|
||||
ENCODING("i64imm", ENCODING_IO)
|
||||
ENCODING("offset8", ENCODING_Ia)
|
||||
ENCODING("offset16", ENCODING_Ia)
|
||||
ENCODING("offset32", ENCODING_Ia)
|
||||
ENCODING("offset64", ENCODING_Ia)
|
||||
errs() << "Unhandled relocation encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled relocation encoding");
|
||||
}
|
||||
|
||||
OperandEncoding RecognizableInstr::opcodeModifierEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
ENCODING("RST", ENCODING_I)
|
||||
ENCODING("GR32", ENCODING_Rv)
|
||||
ENCODING("GR64", ENCODING_RO)
|
||||
ENCODING("GR16", ENCODING_Rv)
|
||||
ENCODING("GR8", ENCODING_RB)
|
||||
errs() << "Unhandled opcode modifier encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled opcode modifier encoding");
|
||||
}
|
||||
#undef ENCODING
|
|
@ -0,0 +1,237 @@
|
|||
//===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is part of the X86 Disassembler Emitter.
|
||||
// It contains the interface of a single recognizable instruction.
|
||||
// Documentation for the disassembler emitter in general can be found in
|
||||
// X86DisasemblerEmitter.h.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef X86RECOGNIZABLEINSTR_H
|
||||
#define X86RECOGNIZABLEINSTR_H
|
||||
|
||||
#include "X86DisassemblerTables.h"
|
||||
|
||||
#include "CodeGenTarget.h"
|
||||
#include "Record.h"
|
||||
|
||||
#include "llvm/System/DataTypes.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace X86Disassembler {
|
||||
|
||||
/// RecognizableInstr - Encapsulates all information required to decode a single
|
||||
/// instruction, as extracted from the LLVM instruction tables. Has methods
|
||||
/// to interpret the information available in the LLVM tables, and to emit the
|
||||
/// instruction into DisassemblerTables.
|
||||
class RecognizableInstr {
|
||||
private:
|
||||
/// The opcode of the instruction, as used in an MCInst
|
||||
InstrUID UID;
|
||||
/// The record from the .td files corresponding to this instruction
|
||||
const Record* Rec;
|
||||
/// The prefix field from the record
|
||||
uint8_t Prefix;
|
||||
/// The opcode field from the record; this is the opcode used in the Intel
|
||||
/// encoding and therefore distinct from the UID
|
||||
uint8_t Opcode;
|
||||
/// The form field from the record
|
||||
uint8_t Form;
|
||||
/// The segment override field from the record
|
||||
uint8_t SegOvr;
|
||||
/// The hasOpSizePrefix field from the record
|
||||
bool HasOpSizePrefix;
|
||||
/// The hasREX_WPrefix field from the record
|
||||
bool HasREX_WPrefix;
|
||||
/// The hasLockPrefix field from the record
|
||||
bool HasLockPrefix;
|
||||
/// The isCodeGenOnly filed from the record
|
||||
bool IsCodeGenOnly;
|
||||
|
||||
/// The instruction name as listed in the tables
|
||||
std::string Name;
|
||||
/// The AT&T AsmString for the instruction
|
||||
std::string AsmString;
|
||||
|
||||
/// Indicates whether the instruction is SSE
|
||||
bool IsSSE;
|
||||
/// Indicates whether the instruction has FR operands - MOVs with FR operands
|
||||
/// are typically ignored
|
||||
bool HasFROperands;
|
||||
/// Indicates whether the instruction should be emitted into the decode
|
||||
/// tables; regardless, it will be emitted into the instruction info table
|
||||
bool ShouldBeEmitted;
|
||||
|
||||
/// The operands of the instruction, as listed in the CodeGenInstruction.
|
||||
/// They are not one-to-one with operands listed in the MCInst; for example,
|
||||
/// memory operands expand to 5 operands in the MCInst
|
||||
const std::vector<CodeGenInstruction::OperandInfo>* Operands;
|
||||
/// The description of the instruction that is emitted into the instruction
|
||||
/// info table
|
||||
InstructionSpecifier* Spec;
|
||||
|
||||
/// insnContext - Returns the primary context in which the instruction is
|
||||
/// valid.
|
||||
///
|
||||
/// @return - The context in which the instruction is valid.
|
||||
InstructionContext insnContext() const;
|
||||
|
||||
enum filter_ret {
|
||||
FILTER_STRONG, // instruction has no place in the instruction tables
|
||||
FILTER_WEAK, // instruction may conflict, and should be eliminated if
|
||||
// it does
|
||||
FILTER_NORMAL // instruction should have high priority and generate an
|
||||
// error if it conflcits with any other FILTER_NORMAL
|
||||
// instruction
|
||||
};
|
||||
|
||||
/// filter - Determines whether the instruction should be decodable. Some
|
||||
/// instructions are pure intrinsics and use unencodable operands; many
|
||||
/// synthetic instructions are duplicates of other instructions; other
|
||||
/// instructions only differ in the logical way in which they are used, and
|
||||
/// have the same decoding. Because these would cause decode conflicts,
|
||||
/// they must be filtered out.
|
||||
///
|
||||
/// @return - The degree of filtering to be applied (see filter_ret).
|
||||
filter_ret filter() const;
|
||||
|
||||
/// typeFromString - Translates an operand type from the string provided in
|
||||
/// the LLVM tables to an OperandType for use in the operand specifier.
|
||||
///
|
||||
/// @param s - The string, as extracted by calling Rec->getName()
|
||||
/// on a CodeGenInstruction::OperandInfo.
|
||||
/// @param isSSE - Indicates whether the instruction is an SSE
|
||||
/// instruction. For SSE instructions, immediates are
|
||||
/// fixed-size rather than being affected by the
|
||||
/// mandatory OpSize prefix.
|
||||
/// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W
|
||||
/// prefix. If it does, 32-bit register operands stay
|
||||
/// 32-bit regardless of the operand size.
|
||||
/// @param hasOpSizePrefix- Indicates whether the instruction has an OpSize
|
||||
/// prefix. If it does not, then 16-bit register
|
||||
/// operands stay 16-bit.
|
||||
/// @return - The operand's type.
|
||||
static OperandType typeFromString(const std::string& s,
|
||||
bool isSSE,
|
||||
bool hasREX_WPrefix,
|
||||
bool hasOpSizePrefix);
|
||||
|
||||
/// immediateEncodingFromString - Translates an immediate encoding from the
|
||||
/// string provided in the LLVM tables to an OperandEncoding for use in
|
||||
/// the operand specifier.
|
||||
///
|
||||
/// @param s - See typeFromString().
|
||||
/// @param hasOpSizePrefix - Indicates whether the instruction has an OpSize
|
||||
/// prefix. If it does not, then 16-bit immediate
|
||||
/// operands stay 16-bit.
|
||||
/// @return - The operand's encoding.
|
||||
static OperandEncoding immediateEncodingFromString(const std::string &s,
|
||||
bool hasOpSizePrefix);
|
||||
|
||||
/// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
|
||||
/// handles operands that are in the REG field of the ModR/M byte.
|
||||
static OperandEncoding rmRegisterEncodingFromString(const std::string &s,
|
||||
bool hasOpSizePrefix);
|
||||
|
||||
/// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
|
||||
/// handles operands that are in the REG field of the ModR/M byte.
|
||||
static OperandEncoding roRegisterEncodingFromString(const std::string &s,
|
||||
bool hasOpSizePrefix);
|
||||
static OperandEncoding memoryEncodingFromString(const std::string &s,
|
||||
bool hasOpSizePrefix);
|
||||
static OperandEncoding relocationEncodingFromString(const std::string &s,
|
||||
bool hasOpSizePrefix);
|
||||
static OperandEncoding opcodeModifierEncodingFromString(const std::string &s,
|
||||
bool hasOpSizePrefix);
|
||||
|
||||
/// handleOperand - Converts a single operand from the LLVM table format to
|
||||
/// the emitted table format, handling any duplicate operands it encounters
|
||||
/// and then one non-duplicate.
|
||||
///
|
||||
/// @param optional - Determines whether to assert that the
|
||||
/// operand exists.
|
||||
/// @param operandIndex - The index into the generated operand table.
|
||||
/// Incremented by this function one or more
|
||||
/// times to reflect possible duplicate
|
||||
/// operands).
|
||||
/// @param physicalOperandIndex - The index of the current operand into the
|
||||
/// set of non-duplicate ('physical') operands.
|
||||
/// Incremented by this function once.
|
||||
/// @param numPhysicalOperands - The number of non-duplicate operands in the
|
||||
/// instructions.
|
||||
/// @param operandMapping - The operand mapping, which has an entry for
|
||||
/// each operand that indicates whether it is a
|
||||
/// duplicate, and of what.
|
||||
void handleOperand(bool optional,
|
||||
unsigned &operandIndex,
|
||||
unsigned &physicalOperandIndex,
|
||||
unsigned &numPhysicalOperands,
|
||||
unsigned *operandMapping,
|
||||
OperandEncoding (*encodingFromString)
|
||||
(const std::string&,
|
||||
bool hasOpSizePrefix));
|
||||
|
||||
/// shouldBeEmitted - Returns the shouldBeEmitted field. Although filter()
|
||||
/// filters out many instructions, at various points in decoding we
|
||||
/// determine that the instruction should not actually be decodable. In
|
||||
/// particular, MMX MOV instructions aren't emitted, but they're only
|
||||
/// identified during operand parsing.
|
||||
///
|
||||
/// @return - true if at this point we believe the instruction should be
|
||||
/// emitted; false if not. This will return false if filter() returns false
|
||||
/// once emitInstructionSpecifier() has been called.
|
||||
bool shouldBeEmitted() const {
|
||||
return ShouldBeEmitted;
|
||||
}
|
||||
|
||||
/// emitInstructionSpecifier - Loads the instruction specifier for the current
|
||||
/// instruction into a DisassemblerTables.
|
||||
///
|
||||
/// @arg tables - The DisassemblerTables to populate with the specifier for
|
||||
/// the current instruction.
|
||||
void emitInstructionSpecifier(DisassemblerTables &tables);
|
||||
|
||||
/// emitDecodePath - Populates the proper fields in the decode tables
|
||||
/// corresponding to the decode paths for this instruction.
|
||||
///
|
||||
/// @arg tables - The DisassemblerTables to populate with the decode
|
||||
/// decode information for the current instruction.
|
||||
void emitDecodePath(DisassemblerTables &tables) const;
|
||||
|
||||
/// Constructor - Initializes a RecognizableInstr with the appropriate fields
|
||||
/// from a CodeGenInstruction.
|
||||
///
|
||||
/// @arg tables - The DisassemblerTables that the specifier will be added to.
|
||||
/// @arg insn - The CodeGenInstruction to extract information from.
|
||||
/// @arg uid - The unique ID of the current instruction.
|
||||
RecognizableInstr(DisassemblerTables &tables,
|
||||
const CodeGenInstruction &insn,
|
||||
InstrUID uid);
|
||||
public:
|
||||
/// processInstr - Accepts a CodeGenInstruction and loads decode information
|
||||
/// for it into a DisassemblerTables if appropriate.
|
||||
///
|
||||
/// @arg tables - The DiassemblerTables to be populated with decode
|
||||
/// information.
|
||||
/// @arg insn - The CodeGenInstruction to be used as a source for this
|
||||
/// information.
|
||||
/// @uid - The unique ID of the instruction.
|
||||
static void processInstr(DisassemblerTables &tables,
|
||||
const CodeGenInstruction &insn,
|
||||
InstrUID uid);
|
||||
};
|
||||
|
||||
} // namespace X86Disassembler
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue