llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h

/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
 *
 *                     The LLVM Compiler Infrastructure
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See LICENSE.TXT for details.
 *
 *===----------------------------------------------------------------------===*
 *
 * This file is part of the X86 Disassembler.
 * It contains common definitions used by both the disassembler and the table
 *  generator.
 * Documentation for the disassembler can be found in X86Disassembler.h.
 *
 *===----------------------------------------------------------------------===*/

/*
 * This header file provides those definitions that need to be shared between
 * the decoder and the table generator in a C-friendly manner.
 */

#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H

#include "llvm/System/DataTypes.h"

#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM      x86DisassemblerContexts
#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes

#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR      "x86DisassemblerContexts"
#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"

/*
 * Attributes of an instruction that must be known before the opcode can be
 * processed correctly.  Most of these indicate the presence of particular
 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
 */
#define ATTRIBUTE_BITS          \
  ENUM_ENTRY(ATTR_NONE,   0x00) \
  ENUM_ENTRY(ATTR_64BIT,  0x01) \
  ENUM_ENTRY(ATTR_XS,     0x02) \
  ENUM_ENTRY(ATTR_XD,     0x04) \
  ENUM_ENTRY(ATTR_REXW,   0x08) \
  ENUM_ENTRY(ATTR_OPSIZE, 0x10)

#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
  ATTRIBUTE_BITS
  ATTR_max
};
#undef ENUM_ENTRY

/*
 * Combinations of the above attributes that are relevant to instruction
 * decode.  Although other combinations are possible, they can be reduced to
 * these without affecting the ultimately decoded instruction.
 */

/*           Class name           Rank  Rationale for rank assignment         */
#define INSTRUCTION_CONTEXTS                                                   \
  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
                                        "64-bit mode but no more")             \
  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
                                        "operands change width")               \
  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
                                        "but not the operands")                \
  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
                                        "but not the operands")                \
  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
                                        "change width; overrides IC_OPSIZE")   \
  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
                                        "secondary")                           \
  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
                                        "opcode")                              \
  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
                                        "IC_64BIT_REXW_XS")                    \
  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
                                        "else because this changes most "      \
                                        "operands' meaning")

#define ENUM_ENTRY(n, r, d) n,    
typedef enum {
  INSTRUCTION_CONTEXTS
  IC_max
} InstructionContext;
#undef ENUM_ENTRY

/*
 * Opcode types, which determine which decode table to use, both in the Intel
 * manual and also for the decoder.
 */
typedef enum {
  ONEBYTE       = 0,
  TWOBYTE       = 1,
  THREEBYTE_38  = 2,
  THREEBYTE_3A  = 3
} OpcodeType;

/*
 * The following structs are used for the hierarchical decode table.  After
 * determining the instruction's class (i.e., which IC_* constant applies to
 * it), the decoder reads the opcode.  Some instructions require specific
 * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
 *
 * If a ModR/M byte is not required, "required" is left unset, and the values
 * for each instructionID are identical.
 */
 
typedef uint16_t InstrUID;

/*
 * ModRMDecisionType - describes the type of ModR/M decision, allowing the 
 * consumer to determine the number of entries in it.
 *
 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
 *                  instruction is the same.
 * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
 *                  corresponds to one instruction; otherwise, it corresponds to
 *                  a different instruction.
 * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
 *                  to a different instruction.
 */

#define MODRMTYPES            \
  ENUM_ENTRY(MODRM_ONEENTRY)  \
  ENUM_ENTRY(MODRM_SPLITRM)   \
  ENUM_ENTRY(MODRM_FULL)

#define ENUM_ENTRY(n) n,    
typedef enum {
  MODRMTYPES
  MODRM_max
} ModRMDecisionType;
#undef ENUM_ENTRY

/*
 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 
 *  instruction each possible value of the ModR/M byte corresponds to.  Once
 *  this information is known, we have narrowed down to a single instruction.
 */
struct ModRMDecision {
  uint8_t     modrm_type;
  
  /* The macro below must be defined wherever this file is included. */
  INSTRUCTION_IDS
};

/*
 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
 *   given a particular opcode.
 */
struct OpcodeDecision {
  struct ModRMDecision modRMDecisions[256];
};

/*
 * ContextDecision - Specifies which opcode->instruction tables to look at given
 *   a particular context (set of attributes).  Since there are many possible
 *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
 *   applies given a specific set of attributes.  Hence there are only IC_max
 *   entries in this table, rather than 2^(ATTR_max).
 */
struct ContextDecision {
  struct OpcodeDecision opcodeDecisions[IC_max];
};

/* 
 * Physical encodings of instruction operands.
 */

#define ENCODINGS                                                              \
  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
                              "the opcode byte")                               \
  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
                              "opcode byte")                                   \
                                                                               \
  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
                              "opcode byte")                                   \
  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
                              "in type")

#define ENUM_ENTRY(n, d) n,    
  typedef enum {
    ENCODINGS
    ENCODING_max
  } OperandEncoding;
#undef ENUM_ENTRY

/* 
 * Semantic interpretations of instruction operands.
 */

#define TYPES                                                                  \
  ENUM_ENTRY(TYPE_NONE,       "")                                              \
  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
  ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
                              "base)")                                         \
  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
                              "floating-point instructions")                   \
  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
  ENUM_ENTRY(TYPE_CR32,       "4-byte control register operand")               \
  ENUM_ENTRY(TYPE_CR64,       "8-byte")                                        \
                                                                               \
  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")

#define ENUM_ENTRY(n, d) n,    
typedef enum {
  TYPES
  TYPE_max
} OperandType;
#undef ENUM_ENTRY

/* 
 * OperandSpecifier - The specification for how to extract and interpret one
 *   operand.
 */
struct OperandSpecifier {
  OperandEncoding  encoding;
  OperandType      type;
};

/*
 * Indicates where the opcode modifier (if any) is to be found.  Extended
 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
 */

#define MODIFIER_TYPES        \
  ENUM_ENTRY(MODIFIER_NONE)   \
  ENUM_ENTRY(MODIFIER_OPCODE) \
  ENUM_ENTRY(MODIFIER_MODRM)

#define ENUM_ENTRY(n) n,
typedef enum {
  MODIFIER_TYPES
  MODIFIER_max
} ModifierType;
#undef ENUM_ENTRY

#define X86_MAX_OPERANDS 5

/*
 * The specification for how to extract and interpret a full instruction and
 * its operands.
 */
struct InstructionSpecifier {
  ModifierType modifierType;
  uint8_t modifierBase;
  struct OperandSpecifier operands[X86_MAX_OPERANDS];
  
  /* The macro below must be defined wherever this file is included. */
  INSTRUCTION_SPECIFIER_FIELDS
};

/*
 * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
 * respectively.
 */
typedef enum {
  MODE_16BIT,
  MODE_32BIT,
  MODE_64BIT
} DisassemblerMode;

#endif
Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit incarnations), integrated into the MC framework. The disassembler is table-driven, using a custom TableGen backend to generate hierarchical tables optimized for fast decode. The disassembler consumes MemoryObjects and produces arrays of MCInsts, adhering to the abstract base class MCDisassembler (llvm/MC/MCDisassembler.h). The disassembler is documented in detail in - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime) - utils/TableGen/DisassemblerEmitter.cpp (table emitter) You can test the disassembler by running llvm-mc -disassemble for i386 or x86_64 targets. Please let me know if you encounter any problems with it. llvm-svn: 91749 2009-12-19 10:59:52 +08:00			`/===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------- C --==`
			`*`
			`* The LLVM Compiler Infrastructure`
			`*`
			`* This file is distributed under the University of Illinois Open Source`
			`* License. See LICENSE.TXT for details.`
			`*`
			`===----------------------------------------------------------------------===`
			`*`
			`* This file is part of the X86 Disassembler.`
			`* It contains common definitions used by both the disassembler and the table`
			`* generator.`
			`* Documentation for the disassembler can be found in X86Disassembler.h.`
			`*`
			`===----------------------------------------------------------------------===/`

			`/*`
			`* This header file provides those definitions that need to be shared between`
			`* the decoder and the table generator in a C-friendly manner.`
			`*/`

			`#ifndef X86DISASSEMBLERDECODERCOMMON_H`
			`#define X86DISASSEMBLERDECODERCOMMON_H`

			`#include "llvm/System/DataTypes.h"`

			`#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers`
			`#define CONTEXTS_SYM x86DisassemblerContexts`
			`#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes`
			`#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes`
			`#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes`
			`#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes`

			`#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"`
			`#define CONTEXTS_STR "x86DisassemblerContexts"`
			`#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"`
			`#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"`
			`#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"`
			`#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"`

			`/*`
			`* Attributes of an instruction that must be known before the opcode can be`
			`* processed correctly. Most of these indicate the presence of particular`
			`* prefixes, but ATTR_64BIT is simply an attribute of the decoding context.`
			`*/`
			`#define ATTRIBUTE_BITS \`
			`ENUM_ENTRY(ATTR_NONE, 0x00) \`
			`ENUM_ENTRY(ATTR_64BIT, 0x01) \`
			`ENUM_ENTRY(ATTR_XS, 0x02) \`
			`ENUM_ENTRY(ATTR_XD, 0x04) \`
			`ENUM_ENTRY(ATTR_REXW, 0x08) \`
			`ENUM_ENTRY(ATTR_OPSIZE, 0x10)`

			`#define ENUM_ENTRY(n, v) n = v,`
			`enum attributeBits {`
			`ATTRIBUTE_BITS`
			`ATTR_max`
			`};`
			`#undef ENUM_ENTRY`

			`/*`
			`* Combinations of the above attributes that are relevant to instruction`
			`* decode. Although other combinations are possible, they can be reduced to`
			`* these without affecting the ultimately decoded instruction.`
			`*/`

			`/* Class name Rank Rationale for rank assignment */`
			`#define INSTRUCTION_CONTEXTS \`
			`ENUM_ENTRY(IC, 0, "says nothing about the instruction") \`
			`ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \`
			`"64-bit mode but no more") \`
			`ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \`
			`"operands change width") \`
			`ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \`
			`"but not the operands") \`
			`ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \`
			`"but not the operands") \`
			`ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\`
			`"change width; overrides IC_OPSIZE") \`
			`ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \`
			`ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \`
			`"secondary") \`
			`ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \`
			`ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \`
			`"opcode") \`
			`ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \`
			`"IC_64BIT_REXW_XS") \`
			`ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \`
			`"else because this changes most " \`
			`"operands' meaning")`

			`#define ENUM_ENTRY(n, r, d) n,`
			`typedef enum {`
			`INSTRUCTION_CONTEXTS`
			`IC_max`
			`} InstructionContext;`
			`#undef ENUM_ENTRY`

			`/*`
			`* Opcode types, which determine which decode table to use, both in the Intel`
			`* manual and also for the decoder.`
			`*/`
			`typedef enum {`
			`ONEBYTE = 0,`
			`TWOBYTE = 1,`
			`THREEBYTE_38 = 2,`
			`THREEBYTE_3A = 3`
			`} OpcodeType;`

			`/*`
			`* The following structs are used for the hierarchical decode table. After`
			`* determining the instruction's class (i.e., which IC_* constant applies to`
			`* it), the decoder reads the opcode. Some instructions require specific`
			`* values of the ModR/M byte, so the ModR/M byte indexes into the final table.`
			`*`
			`* If a ModR/M byte is not required, "required" is left unset, and the values`
			`* for each instructionID are identical.`
			`*/`

			`typedef uint16_t InstrUID;`

			`/*`
			`* ModRMDecisionType - describes the type of ModR/M decision, allowing the`
			`* consumer to determine the number of entries in it.`
			`*`
			`* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded`
			`* instruction is the same.`
			`* MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode`
			`* corresponds to one instruction; otherwise, it corresponds to`
			`* a different instruction.`
			`* MODRM_FULL - Potentially, each value of the ModR/M byte could correspond`
			`* to a different instruction.`
			`*/`

			`#define MODRMTYPES \`
			`ENUM_ENTRY(MODRM_ONEENTRY) \`
			`ENUM_ENTRY(MODRM_SPLITRM) \`
			`ENUM_ENTRY(MODRM_FULL)`

			`#define ENUM_ENTRY(n) n,`
			`typedef enum {`
			`MODRMTYPES`
			`MODRM_max`
			`} ModRMDecisionType;`
			`#undef ENUM_ENTRY`

			`/*`
			`* ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which`
			`* instruction each possible value of the ModR/M byte corresponds to. Once`
			`* this information is known, we have narrowed down to a single instruction.`
			`*/`
			`struct ModRMDecision {`
			`uint8_t modrm_type;`

			`/* The macro below must be defined wherever this file is included. */`
			`INSTRUCTION_IDS`
			`};`

			`/*`
			`* OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at`
			`* given a particular opcode.`
			`*/`
			`struct OpcodeDecision {`
			`struct ModRMDecision modRMDecisions[256];`
			`};`

			`/*`
			`* ContextDecision - Specifies which opcode->instruction tables to look at given`
			`* a particular context (set of attributes). Since there are many possible`
			`* contexts, the decoder first uses CONTEXTS_SYM to determine which context`
			`* applies given a specific set of attributes. Hence there are only IC_max`
			`* entries in this table, rather than 2^(ATTR_max).`
			`*/`
			`struct ContextDecision {`
			`struct OpcodeDecision opcodeDecisions[IC_max];`
			`};`

			`/*`
			`* Physical encodings of instruction operands.`
			`*/`

			`#define ENCODINGS \`
			`ENUM_ENTRY(ENCODING_NONE, "") \`
			`ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \`
			`ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \`
			`ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \`
			`ENUM_ENTRY(ENCODING_CW, "2-byte") \`
			`ENUM_ENTRY(ENCODING_CD, "4-byte") \`
			`ENUM_ENTRY(ENCODING_CP, "6-byte") \`
			`ENUM_ENTRY(ENCODING_CO, "8-byte") \`
			`ENUM_ENTRY(ENCODING_CT, "10-byte") \`
			`ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \`
			`ENUM_ENTRY(ENCODING_IW, "2-byte") \`
			`ENUM_ENTRY(ENCODING_ID, "4-byte") \`
			`ENUM_ENTRY(ENCODING_IO, "8-byte") \`
			`ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \`
			`"the opcode byte") \`
			`ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \`
			`ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \`
			`ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \`
			`ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \`
			`"opcode byte") \`
			`\`
			`ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \`
			`ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \`
			`ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \`
			`"opcode byte") \`
			`ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \`
			`"in type")`

			`#define ENUM_ENTRY(n, d) n,`
			`typedef enum {`
			`ENCODINGS`
			`ENCODING_max`
			`} OperandEncoding;`
			`#undef ENUM_ENTRY`

			`/*`
			`* Semantic interpretations of instruction operands.`
			`*/`

			`#define TYPES \`
			`ENUM_ENTRY(TYPE_NONE, "") \`
			`ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \`
			`ENUM_ENTRY(TYPE_REL16, "2-byte") \`
			`ENUM_ENTRY(TYPE_REL32, "4-byte") \`
			`ENUM_ENTRY(TYPE_REL64, "8-byte") \`
			`ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \`
			`ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \`
			`ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \`
			`ENUM_ENTRY(TYPE_R8, "1-byte register operand") \`
			`ENUM_ENTRY(TYPE_R16, "2-byte") \`
			`ENUM_ENTRY(TYPE_R32, "4-byte") \`
			`ENUM_ENTRY(TYPE_R64, "8-byte") \`
			`ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \`
			`ENUM_ENTRY(TYPE_IMM16, "2-byte") \`
			`ENUM_ENTRY(TYPE_IMM32, "4-byte") \`
			`ENUM_ENTRY(TYPE_IMM64, "8-byte") \`
			`ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \`
			`ENUM_ENTRY(TYPE_RM16, "2-byte") \`
			`ENUM_ENTRY(TYPE_RM32, "4-byte") \`
			`ENUM_ENTRY(TYPE_RM64, "8-byte") \`
			`ENUM_ENTRY(TYPE_M, "Memory operand") \`
			`ENUM_ENTRY(TYPE_M8, "1-byte") \`
			`ENUM_ENTRY(TYPE_M16, "2-byte") \`
			`ENUM_ENTRY(TYPE_M32, "4-byte") \`
			`ENUM_ENTRY(TYPE_M64, "8-byte") \`
Fixes to the X86 disassembler: Made LEA memory operands emit only 4 MCInst operands. Made the scale operand equal 1 for instructions that have no SIB byte. llvm-svn: 91919 2009-12-23 05:12:55 +08:00			`ENUM_ENTRY(TYPE_LEA, "Effective address") \`
Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit incarnations), integrated into the MC framework. The disassembler is table-driven, using a custom TableGen backend to generate hierarchical tables optimized for fast decode. The disassembler consumes MemoryObjects and produces arrays of MCInsts, adhering to the abstract base class MCDisassembler (llvm/MC/MCDisassembler.h). The disassembler is documented in detail in - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime) - utils/TableGen/DisassemblerEmitter.cpp (table emitter) You can test the disassembler by running llvm-mc -disassemble for i386 or x86_64 targets. Please let me know if you encounter any problems with it. llvm-svn: 91749 2009-12-19 10:59:52 +08:00			`ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \`
			`ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \`
			`ENUM_ENTRY(TYPE_M1632, "2+4-byte") \`
			`ENUM_ENTRY(TYPE_M1664, "2+8-byte") \`
			`ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \`
			`ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \`
			`ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \`
			`ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \`
			`ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \`
			`"base)") \`
			`ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \`
			`ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \`
			`ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \`
			`ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \`
			`"2 = SS, 3 = DS, 4 = FS, 5 = GS") \`
			`ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \`
			`ENUM_ENTRY(TYPE_M64FP, "64-bit") \`
			`ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \`
			`ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \`
			`"floating-point instructions") \`
			`ENUM_ENTRY(TYPE_M32INT, "4-byte") \`
			`ENUM_ENTRY(TYPE_M64INT, "8-byte") \`
			`ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \`
			`ENUM_ENTRY(TYPE_MM, "MMX register operand") \`
			`ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \`
			`ENUM_ENTRY(TYPE_MM64, "8-byte") \`
			`ENUM_ENTRY(TYPE_XMM, "XMM register operand") \`
			`ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \`
			`ENUM_ENTRY(TYPE_XMM64, "8-byte") \`
			`ENUM_ENTRY(TYPE_XMM128, "16-byte") \`
			`ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \`
			`ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \`
			`ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \`
			`ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \`
			`ENUM_ENTRY(TYPE_CR64, "8-byte") \`
			`\`
			`ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \`
			`ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \`
			`ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \`
			`ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \`
			`ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \`
			`ENUM_ENTRY(TYPE_DUP1, "operand 1") \`
			`ENUM_ENTRY(TYPE_DUP2, "operand 2") \`
			`ENUM_ENTRY(TYPE_DUP3, "operand 3") \`
			`ENUM_ENTRY(TYPE_DUP4, "operand 4") \`
			`ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")`

			`#define ENUM_ENTRY(n, d) n,`
			`typedef enum {`
			`TYPES`
			`TYPE_max`
			`} OperandType;`
			`#undef ENUM_ENTRY`

			`/*`
			`* OperandSpecifier - The specification for how to extract and interpret one`
			`* operand.`
			`*/`
			`struct OperandSpecifier {`
			`OperandEncoding encoding;`
			`OperandType type;`
			`};`

			`/*`
			`* Indicates where the opcode modifier (if any) is to be found. Extended`
			`* opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.`
			`*/`

			`#define MODIFIER_TYPES \`
			`ENUM_ENTRY(MODIFIER_NONE) \`
			`ENUM_ENTRY(MODIFIER_OPCODE) \`
			`ENUM_ENTRY(MODIFIER_MODRM)`

			`#define ENUM_ENTRY(n) n,`
			`typedef enum {`
			`MODIFIER_TYPES`
			`MODIFIER_max`
			`} ModifierType;`
			`#undef ENUM_ENTRY`

			`#define X86_MAX_OPERANDS 5`

			`/*`
			`* The specification for how to extract and interpret a full instruction and`
			`* its operands.`
			`*/`
			`struct InstructionSpecifier {`
			`ModifierType modifierType;`
			`uint8_t modifierBase;`
			`struct OperandSpecifier operands[X86_MAX_OPERANDS];`

			`/* The macro below must be defined wherever this file is included. */`
			`INSTRUCTION_SPECIFIER_FIELDS`
			`};`

			`/*`
			`* Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode`
			`* are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,`
			`* respectively.`
			`*/`
			`typedef enum {`
			`MODE_16BIT,`
			`MODE_32BIT,`
			`MODE_64BIT`
			`} DisassemblerMode;`

			`#endif`