X86 table-generator and disassembler support for the AVX

instruction set.  This code adds support for the VEX prefix
and for the YMM registers accessible on AVX-enabled
architectures.  Instruction table support that enables AVX
instructions for the disassembler is in an upcoming patch.

llvm-svn: 127644
This commit is contained in:
Sean Callanan 2011-03-15 01:23:15 +00:00
parent a34f1b1f10
commit c3fd523731
7 changed files with 570 additions and 95 deletions

View File

@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM32:
case TYPE_XMM64:
case TYPE_XMM128:
case TYPE_XMM256:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
return translateRMRegister(mcInst, insn);
@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_M32:
case TYPE_M64:
case TYPE_M128:
case TYPE_M256:
case TYPE_M512:
case TYPE_Mv:
case TYPE_M32FP:
@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_Rv:
translateRegister(mcInst, insn.opcodeRegister);
return false;
case ENCODING_VVVV:
translateRegister(mcInst, insn.vvvv);
return false;
case ENCODING_DUP:
return translateOperand(mcInst,
insn.spec->operands[operand.type - TYPE_DUP0],

View File

@ -368,29 +368,109 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (isPrefix)
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
insn->vexSize = 0;
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
if (byte == 0xc4) {
uint8_t byte1;
if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
dbgprintf(insn, "Redundant REX prefix");
return -1;
}
insn->rexPrefix = byte;
insn->necessaryPrefixLocation = insn->readerCursor - 2;
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
} else {
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
if (insn->mode == MODE_64BIT || byte1 & 0x8) {
insn->vexSize = 3;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
} else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
if (insn->vexSize == 3) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
consumeByte(insn, &insn->vexPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
insn->rexPrefix = 0x40
| (wFromVEX3of3(insn->vexPrefix[2]) << 3)
| (rFromVEX2of3(insn->vexPrefix[1]) << 2)
| (xFromVEX2of3(insn->vexPrefix[1]) << 1)
| (bFromVEX2of3(insn->vexPrefix[1]) << 0);
switch (ppFromVEX3of3(insn->vexPrefix[2]))
{
default:
break;
case VEX_PREFIX_66:
hasOpSize = TRUE;
break;
}
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
}
}
else if (byte == 0xc5) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
if (insn->mode == MODE_64BIT || byte1 & 0x8) {
insn->vexSize = 2;
}
else {
unconsumeByte(insn);
}
if (insn->vexSize == 2) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
insn->rexPrefix = 0x40
| (rFromVEX2of2(insn->vexPrefix[1]) << 2);
switch (ppFromVEX2of2(insn->vexPrefix[1]))
{
default:
break;
case VEX_PREFIX_66:
hasOpSize = TRUE;
break;
}
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
}
}
else {
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
dbgprintf(insn, "Redundant REX prefix");
return -1;
}
insn->rexPrefix = byte;
insn->necessaryPrefixLocation = insn->readerCursor - 2;
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
} else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
} else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
}
if (insn->mode == MODE_16BIT) {
insn->registerSize = (hasOpSize ? 4 : 2);
insn->addressSize = (hasAdSize ? 4 : 2);
@ -438,6 +518,39 @@ static int readOpcode(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcode()");
insn->opcodeType = ONEBYTE;
if (insn->vexSize == 3)
{
switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
{
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
return -1;
case 0:
break;
case VEX_LOB_0F:
insn->twoByteEscape = 0x0f;
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F38:
insn->twoByteEscape = 0x0f;
insn->threeByteEscape = 0x38;
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F3A:
insn->twoByteEscape = 0x0f;
insn->threeByteEscape = 0x3a;
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
}
else if (insn->vexSize == 2)
{
insn->twoByteEscape = 0x0f;
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
}
if (consumeByte(insn, &current))
return -1;
@ -600,20 +713,64 @@ static int getID(struct InternalInstruction* insn) {
dbgprintf(insn, "getID()");
attrMask = ATTR_NONE;
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
if (insn->vexSize) {
attrMask |= ATTR_VEX;
if (insn->vexSize == 3) {
switch (ppFromVEX3of3(insn->vexPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
break;
case VEX_PREFIX_F2:
attrMask |= ATTR_XD;
break;
}
if (wFromVEX3of3(insn->vexPrefix[2]))
attrMask |= ATTR_REXW;
if (lFromVEX3of3(insn->vexPrefix[2]))
attrMask |= ATTR_VEXL;
}
else if (insn->vexSize == 2) {
switch (ppFromVEX2of2(insn->vexPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
break;
case VEX_PREFIX_F2:
attrMask |= ATTR_XD;
break;
}
if (lFromVEX2of2(insn->vexPrefix[1]))
attrMask |= ATTR_VEXL;
}
else {
return -1;
}
}
else {
if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW;
if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW;
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
attrMask |= ATTR_XD;
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
attrMask |= ATTR_XD;
}
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@ -1012,6 +1169,8 @@ static int readModRM(struct InternalInstruction* insn) {
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
case TYPE_XMM256: \
return prefix##_YMM0 + index; \
case TYPE_XMM128: \
case TYPE_XMM64: \
case TYPE_XMM32: \
@ -1073,6 +1232,14 @@ static int fixupReg(struct InternalInstruction *insn,
default:
debug("Expected a REG or R/M encoding in fixupReg");
return -1;
case ENCODING_VVVV:
insn->vvvv = (Reg)fixupRegValue(insn,
(OperandType)op->type,
insn->vvvv,
&valid);
if (!valid)
return -1;
break;
case ENCODING_REG:
insn->reg = (Reg)fixupRegValue(insn,
(OperandType)op->type,
@ -1236,6 +1403,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
return 0;
}
/*
* readVVVV - Consumes an immediate operand from an instruction, given the
* desired operand size.
*
* @param insn - The instruction whose operand is to be read.
* @return - 0 if the immediate was successfully consumed; nonzero
* otherwise.
*/
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
if (insn->vexSize == 3)
insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
else if (insn->vexSize == 2)
insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
else
return -1;
return 0;
}
/*
* readOperands - Consults the specifier for an instruction and consumes all
* operands for that instruction, interpreting them as it goes.
@ -1317,6 +1505,13 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_I:
if (readOpcodeModifier(insn))
return -1;
break;
case ENCODING_VVVV:
if (readVVVV(insn))
return -1;
if (fixupReg(insn, &insn->spec->operands[index]))
return -1;
break;
case ENCODING_DUP:
break;
default:

View File

@ -34,16 +34,30 @@ extern "C" {
/*
* Accessor functions for various fields of an Intel instruction
*/
#define modFromModRM(modRM) ((modRM & 0xc0) >> 6)
#define regFromModRM(modRM) ((modRM & 0x38) >> 3)
#define rmFromModRM(modRM) (modRM & 0x7)
#define scaleFromSIB(sib) ((sib & 0xc0) >> 6)
#define indexFromSIB(sib) ((sib & 0x38) >> 3)
#define baseFromSIB(sib) (sib & 0x7)
#define wFromREX(rex) ((rex & 0x8) >> 3)
#define rFromREX(rex) ((rex & 0x4) >> 2)
#define xFromREX(rex) ((rex & 0x2) >> 1)
#define bFromREX(rex) (rex & 0x1)
#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
#define rmFromModRM(modRM) ((modRM) & 0x7)
#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
#define baseFromSIB(sib) ((sib) & 0x7)
#define wFromREX(rex) (((rex) & 0x8) >> 3)
#define rFromREX(rex) (((rex) & 0x4) >> 2)
#define xFromREX(rex) (((rex) & 0x2) >> 1)
#define bFromREX(rex) ((rex) & 0x1)
#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
#define ppFromVEX3of3(vex) ((vex) & 0x3)
#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
#define ppFromVEX2of2(vex) ((vex) & 0x3)
/*
* These enums represent Intel registers for use by the decoder.
@ -206,7 +220,25 @@ extern "C" {
ENTRY(XMM13) \
ENTRY(XMM14) \
ENTRY(XMM15)
#define REGS_YMM \
ENTRY(YMM0) \
ENTRY(YMM1) \
ENTRY(YMM2) \
ENTRY(YMM3) \
ENTRY(YMM4) \
ENTRY(YMM5) \
ENTRY(YMM6) \
ENTRY(YMM7) \
ENTRY(YMM8) \
ENTRY(YMM9) \
ENTRY(YMM10) \
ENTRY(YMM11) \
ENTRY(YMM12) \
ENTRY(YMM13) \
ENTRY(YMM14) \
ENTRY(YMM15)
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@ -252,6 +284,7 @@ extern "C" {
REGS_64BIT \
REGS_MMX \
REGS_XMM \
REGS_YMM \
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
@ -332,6 +365,27 @@ typedef enum {
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
} SegmentOverride;
/*
* VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
*/
typedef enum {
VEX_LOB_0F = 0x1,
VEX_LOB_0F38 = 0x2,
VEX_LOB_0F3A = 0x3
} VEXLeadingOpcodeByte;
/*
* VEXPrefixCode - Possible values for the VEX.pp field
*/
typedef enum {
VEX_PREFIX_NONE = 0x0,
VEX_PREFIX_66 = 0x1,
VEX_PREFIX_F3 = 0x2,
VEX_PREFIX_F2 = 0x3
} VEXPrefixCode;
typedef uint8_t BOOL;
@ -389,10 +443,12 @@ struct InternalInstruction {
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
uint64_t prefixLocations[0x100];
/* The value of the VEX prefix, if present */
uint8_t vexPrefix[3];
/* The length of the VEX prefix (0 if not present) */
uint8_t vexSize;
/* The value of the REX prefix, if present */
uint8_t rexPrefix;
/* The location of the REX prefix */
uint64_t rexLocation;
/* The location where a mandatory prefix would have to be (i.e., right before
the opcode, or right before the REX prefix if one is present) */
uint64_t necessaryPrefixLocation;
@ -428,6 +484,10 @@ struct InternalInstruction {
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
/* The VEX.vvvv field, which contains a thrid register operand for some AVX
instructions */
Reg vvvv;
/* The ModR/M byte, which contains most register operands and some portion of
all memory operands */

View File

@ -49,7 +49,9 @@
ENUM_ENTRY(ATTR_XS, 0x02) \
ENUM_ENTRY(ATTR_XD, 0x04) \
ENUM_ENTRY(ATTR_REXW, 0x08) \
ENUM_ENTRY(ATTR_OPSIZE, 0x10)
ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
ENUM_ENTRY(ATTR_VEX, 0x20) \
ENUM_ENTRY(ATTR_VEXL, 0x40)
#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
@ -87,7 +89,20 @@ enum attributeBits {
"IC_64BIT_REXW_XS") \
ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
"else because this changes most " \
"operands' meaning")
"operands' meaning") \
ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
#define ENUM_ENTRY(n, r, d) n,
typedef enum {
@ -183,6 +198,7 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_NONE, "") \
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
ENUM_ENTRY(ENCODING_CW, "2-byte") \
ENUM_ENTRY(ENCODING_CD, "4-byte") \
@ -278,6 +294,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
ENUM_ENTRY(TYPE_XMM64, "8-byte") \
ENUM_ENTRY(TYPE_XMM128, "16-byte") \
ENUM_ENTRY(TYPE_XMM256, "32-byte") \
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \

View File

@ -46,9 +46,11 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_OPSIZE:
return(inheritsFrom(child, IC_64BIT_OPSIZE));
case IC_XD:
return(inheritsFrom(child, IC_64BIT_XD));
return(inheritsFrom(child, IC_64BIT_XD) ||
inheritsFrom(child, IC_VEX_XD));
case IC_XS:
return(inheritsFrom(child, IC_64BIT_XS));
return(inheritsFrom(child, IC_64BIT_XS) ||
inheritsFrom(child, IC_VEX_XS));
case IC_64BIT_REXW:
return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
inheritsFrom(child, IC_64BIT_REXW_XD) ||
@ -65,6 +67,35 @@ static inline bool inheritsFrom(InstructionContext child,
return false;
case IC_64BIT_REXW_OPSIZE:
return false;
case IC_VEX:
return(inheritsFrom(child, IC_VEX_XS) ||
inheritsFrom(child, IC_VEX_XD) ||
inheritsFrom(child, IC_VEX_L) ||
inheritsFrom(child, IC_VEX_W) ||
inheritsFrom(child, IC_VEX_OPSIZE));
case IC_VEX_XS:
return(inheritsFrom(child, IC_VEX_L_XS) ||
inheritsFrom(child, IC_VEX_W_XS));
case IC_VEX_XD:
return(inheritsFrom(child, IC_VEX_L_XD) ||
inheritsFrom(child, IC_VEX_W_XD));
case IC_VEX_L:
return(inheritsFrom(child, IC_VEX_L_XS) ||
inheritsFrom(child, IC_VEX_L_XD));
case IC_VEX_L_XS:
return false;
case IC_VEX_L_XD:
return false;
case IC_VEX_W:
return(inheritsFrom(child, IC_VEX_W_XS) ||
inheritsFrom(child, IC_VEX_W_XD) ||
inheritsFrom(child, IC_VEX_W_OPSIZE));
case IC_VEX_W_XS:
return false;
case IC_VEX_W_XD:
return false;
case IC_VEX_OPSIZE:
return inheritsFrom(child, IC_VEX_W_OPSIZE);
default:
return false;
}
@ -461,7 +492,29 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
for (index = 0; index < 256; ++index) {
o.indent(i * 2);
if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
o << "IC_VEX_L_OPSIZE";
else if ((index & ATTR_VEXL) && (index & ATTR_XD))
o << "IC_VEX_L_XD";
else if ((index & ATTR_VEXL) && (index & ATTR_XS))
o << "IC_VEX_L_XS";
else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
o << "IC_VEX_W_OPSIZE";
else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD))
o << "IC_VEX_W_XD";
else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS))
o << "IC_VEX_W_XS";
else if (index & ATTR_VEXL)
o << "IC_VEX_L";
else if ((index & ATTR_VEX) && (index & ATTR_REXW))
o << "IC_VEX_W";
else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE))
o << "IC_VEX_OPSIZE";
else if ((index & ATTR_VEX) && (index & ATTR_XD))
o << "IC_VEX_XD";
else if ((index & ATTR_VEX) && (index & ATTR_XS))
o << "IC_VEX_XS";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
o << "IC_64BIT_REXW_XS";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
o << "IC_64BIT_REXW_XD";
@ -484,6 +537,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
o << "IC_XD";
else if (index & ATTR_OPSIZE)
o << "IC_OPSIZE";
else if (index & ATTR_VEX)
o << "IC_VEX";
else
o << "IC";

View File

@ -214,7 +214,9 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
HasOpSizePrefix = Rec->getValueAsBit("hasOpSizePrefix");
HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix");
HasVEXPrefix = Rec->getValueAsBit("hasVEXPrefix");
HasVEX_4VPrefix = Rec->getValueAsBit("hasVEX_4VPrefix");
HasVEX_WPrefix = Rec->getValueAsBit("hasVEX_WPrefix");
HasLockPrefix = Rec->getValueAsBit("hasLockPrefix");
IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
@ -224,7 +226,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
Operands = &insn.Operands.OperandList;
IsSSE = HasOpSizePrefix && (Name.find("16") == Name.npos);
HasFROperands = false;
HasFROperands = hasFROperands();
HasVEX_LPrefix = has256BitOperands() || Rec->getValueAsBit("hasVEX_L");
ShouldBeEmitted = true;
}
@ -248,7 +251,32 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
InstructionContext RecognizableInstr::insnContext() const {
InstructionContext insnContext;
if (Name.find("64") != Name.npos || HasREX_WPrefix) {
if (HasVEX_4VPrefix || HasVEXPrefix) {
if (HasOpSizePrefix && HasVEX_LPrefix)
insnContext = IC_VEX_L_OPSIZE;
else if (HasOpSizePrefix && HasVEX_WPrefix)
insnContext = IC_VEX_W_OPSIZE;
else if (HasOpSizePrefix)
insnContext = IC_VEX_OPSIZE;
else if (HasVEX_LPrefix && Prefix == X86Local::XS)
insnContext = IC_VEX_L_XS;
else if (HasVEX_LPrefix && Prefix == X86Local::XD)
insnContext = IC_VEX_L_XD;
else if (HasVEX_WPrefix && Prefix == X86Local::XS)
insnContext = IC_VEX_W_XS;
else if (HasVEX_WPrefix && Prefix == X86Local::XD)
insnContext = IC_VEX_W_XD;
else if (HasVEX_WPrefix)
insnContext = IC_VEX_W;
else if (HasVEX_LPrefix)
insnContext = IC_VEX_L;
else if (Prefix == X86Local::XD)
insnContext = IC_VEX_XD;
else if (Prefix == X86Local::XS)
insnContext = IC_VEX_XS;
else
insnContext = IC_VEX;
} else if (Name.find("64") != Name.npos || HasREX_WPrefix) {
if (HasREX_WPrefix && HasOpSizePrefix)
insnContext = IC_64BIT_REXW_OPSIZE;
else if (HasOpSizePrefix)
@ -280,6 +308,10 @@ InstructionContext RecognizableInstr::insnContext() const {
}
RecognizableInstr::filter_ret RecognizableInstr::filter() const {
///////////////////
// FILTER_STRONG
//
// Filter out intrinsics
if (!Rec->isSubClassOf("X86Inst"))
@ -291,26 +323,71 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
if (Form == X86Local::MRMInitReg)
return FILTER_STRONG;
// TEMPORARY pending bug fixes
if (Name.find("VMOVDQU") != Name.npos ||
Name.find("VMOVDQA") != Name.npos ||
Name.find("VROUND") != Name.npos)
return FILTER_STRONG;
// Filter out artificial instructions
if (Name.find("TAILJMP") != Name.npos ||
Name.find("_Int") != Name.npos ||
Name.find("_int") != Name.npos ||
Name.find("Int_") != Name.npos ||
Name.find("_NOREX") != Name.npos ||
Name.find("_TC") != Name.npos ||
Name.find("EH_RETURN") != Name.npos ||
Name.find("V_SET") != Name.npos ||
Name.find("LOCK_") != Name.npos ||
Name.find("WIN") != Name.npos ||
Name.find("_AVX") != Name.npos ||
Name.find("2SDL") != Name.npos)
return FILTER_STRONG;
// Filter out instructions with segment override prefixes.
// They're too messy to handle now and we'll special case them if needed.
if (SegOvr)
return FILTER_STRONG;
// Filter out instructions that can't be printed.
if (AsmString.size() == 0)
return FILTER_STRONG;
// Filter out instructions with subreg operands.
if (AsmString.find("subreg") != AsmString.npos)
return FILTER_STRONG;
/////////////////
// FILTER_WEAK
//
// Filter out instructions with a LOCK prefix;
// prefer forms that do not have the prefix
if (HasLockPrefix)
return FILTER_WEAK;
// Filter out artificial instructions
if (Name.find("TAILJMP") != Name.npos ||
Name.find("_Int") != Name.npos ||
Name.find("_int") != Name.npos ||
Name.find("Int_") != Name.npos ||
Name.find("_NOREX") != Name.npos ||
Name.find("_TC") != Name.npos ||
Name.find("EH_RETURN") != Name.npos ||
Name.find("V_SET") != Name.npos ||
Name.find("LOCK_") != Name.npos ||
Name.find("WIN") != Name.npos)
return FILTER_STRONG;
// Filter out alternate forms of AVX instructions
if (Name.find("_alt") != Name.npos ||
Name.find("XrYr") != Name.npos ||
Name.find("r64r") != Name.npos ||
Name.find("_64mr") != Name.npos ||
Name.find("Xrr") != Name.npos ||
Name.find("rr64") != Name.npos)
return FILTER_WEAK;
if (Name == "VMASKMOVDQU64" ||
Name == "VEXTRACTPSrr64" ||
Name == "VMOVQd64rr" ||
Name == "VMOVQs64rr")
return FILTER_WEAK;
// Special cases.
@ -339,6 +416,7 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
Name == "PUSH32i16" ||
Name == "PUSH64i16" ||
Name == "MOVPQI2QImr" ||
Name == "VMOVPQI2QImr" ||
Name == "MOVSDmr" ||
Name == "MOVSDrm" ||
Name == "MOVSSmr" ||
@ -349,22 +427,6 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
Name == "CRC32r16")
return FILTER_WEAK;
// Filter out instructions with segment override prefixes.
// They're too messy to handle now and we'll special case them if needed.
if (SegOvr)
return FILTER_STRONG;
// Filter out instructions that can't be printed.
if (AsmString.size() == 0)
return FILTER_STRONG;
// Filter out instructions with subreg operands.
if (AsmString.find("subreg") != AsmString.npos)
return FILTER_STRONG;
if (HasFROperands && Name.find("MOV") != Name.npos &&
((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
(Name.find("to") != Name.npos)))
@ -372,6 +434,33 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
return FILTER_NORMAL;
}
bool RecognizableInstr::hasFROperands() const {
const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
unsigned numOperands = OperandList.size();
for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
const std::string &recName = OperandList[operandIndex].Rec->getName();
if (recName.find("FR") != recName.npos)
return true;
}
return false;
}
bool RecognizableInstr::has256BitOperands() const {
const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
unsigned numOperands = OperandList.size();
for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
const std::string &recName = OperandList[operandIndex].Rec->getName();
if (!recName.compare("VR256") || !recName.compare("f256mem")) {
return true;
}
}
return false;
}
void RecognizableInstr::handleOperand(
bool optional,
@ -395,13 +484,13 @@ void RecognizableInstr::handleOperand(
}
const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
Spec->operands[operandIndex].encoding = encodingFromString(typeName,
HasOpSizePrefix);
Spec->operands[operandIndex].type = typeFromString(typeName,
IsSSE,
HasREX_WPrefix,
HasOpSizePrefix);
IsSSE,
HasREX_WPrefix,
HasOpSizePrefix);
++operandIndex;
++physicalOperandIndex;
@ -530,31 +619,45 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
case X86Local::MRMSrcReg:
// Operand 1 is a register operand in the Reg/Opcode field.
// Operand 2 is a register operand in the R/M field.
// - In AVX, there is a register operand in the VEX.vvvv field here -
// Operand 3 (optional) is an immediate.
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMSrcRegFrm");
HANDLE_OPERAND(roRegister)
HANDLE_OPERAND(rmRegister)
if (HasVEX_4VPrefix)
assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 &&
"Unexpected number of operands for MRMSrcRegFrm with VEX_4V");
else
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMSrcRegFrm");
HANDLE_OPERAND(roRegister)
if (HasVEX_4VPrefix)
// FIXME: In AVX, the register below becomes the one encoded
// in ModRMVEX and the one above the one in the VEX.VVVV field
HANDLE_OPTIONAL(rmRegister)
else
HANDLE_OPTIONAL(immediate)
HANDLE_OPERAND(vvvvRegister)
HANDLE_OPERAND(rmRegister)
HANDLE_OPTIONAL(immediate)
break;
case X86Local::MRMSrcMem:
// Operand 1 is a register operand in the Reg/Opcode field.
// Operand 2 is a memory operand (possibly SIB-extended)
// - In AVX, there is a register operand in the VEX.vvvv field here -
// Operand 3 (optional) is an immediate.
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMSrcMemFrm");
if (HasVEX_4VPrefix)
assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 &&
"Unexpected number of operands for MRMSrcMemFrm with VEX_4V");
else
assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMSrcMemFrm");
HANDLE_OPERAND(roRegister)
if (HasVEX_4VPrefix)
// FIXME: In AVX, the register below becomes the one encoded
// in ModRMVEX and the one above the one in the VEX.VVVV field
HANDLE_OPTIONAL(rmRegister)
HANDLE_OPERAND(vvvvRegister)
HANDLE_OPERAND(memory)
HANDLE_OPTIONAL(immediate)
@ -569,8 +672,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
case X86Local::MRM7r:
// Operand 1 is a register operand in the R/M field.
// Operand 2 (optional) is an immediate or relocation.
assert(numPhysicalOperands <= 2 &&
"Unexpected number of operands for MRMnRFrm");
if (HasVEX_4VPrefix)
assert(numPhysicalOperands <= 3 &&
"Unexpected number of operands for MRMSrcMemFrm with VEX_4V");
else
assert(numPhysicalOperands <= 2 &&
"Unexpected number of operands for MRMnRFrm");
if (HasVEX_4VPrefix)
HANDLE_OPERAND(vvvvRegister);
HANDLE_OPTIONAL(rmRegister)
HANDLE_OPTIONAL(relocation)
break;
@ -854,6 +963,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("ssmem", TYPE_M32FP)
TYPE("RST", TYPE_ST)
TYPE("i128mem", TYPE_M128)
TYPE("i256mem", TYPE_M256)
TYPE("i64i32imm_pcrel", TYPE_REL64)
TYPE("i16imm_pcrel", TYPE_REL16)
TYPE("i32imm_pcrel", TYPE_REL32)
@ -878,6 +988,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("offset16", TYPE_MOFFS16)
TYPE("offset32", TYPE_MOFFS32)
TYPE("offset64", TYPE_MOFFS64)
TYPE("VR256", TYPE_XMM256)
errs() << "Unhandled type string " << s << "\n";
llvm_unreachable("Unhandled type string");
}
@ -900,6 +1011,10 @@ OperandEncoding RecognizableInstr::immediateEncodingFromString
ENCODING("i64i32imm", ENCODING_ID)
ENCODING("i64i8imm", ENCODING_IB)
ENCODING("i8imm", ENCODING_IB)
// This is not a typo. Instructions like BLENDVPD put
// register IDs in 8-bit immediates nowadays.
ENCODING("VR256", ENCODING_IB)
ENCODING("VR128", ENCODING_IB)
errs() << "Unhandled immediate encoding " << s << "\n";
llvm_unreachable("Unhandled immediate encoding");
}
@ -915,6 +1030,7 @@ OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
ENCODING("FR64", ENCODING_RM)
ENCODING("FR32", ENCODING_RM)
ENCODING("VR64", ENCODING_RM)
ENCODING("VR256", ENCODING_RM)
errs() << "Unhandled R/M register encoding " << s << "\n";
llvm_unreachable("Unhandled R/M register encoding");
}
@ -933,10 +1049,22 @@ OperandEncoding RecognizableInstr::roRegisterEncodingFromString
ENCODING("SEGMENT_REG", ENCODING_REG)
ENCODING("DEBUG_REG", ENCODING_REG)
ENCODING("CONTROL_REG", ENCODING_REG)
ENCODING("VR256", ENCODING_REG)
errs() << "Unhandled reg/opcode register encoding " << s << "\n";
llvm_unreachable("Unhandled reg/opcode register encoding");
}
OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString
(const std::string &s,
bool hasOpSizePrefix) {
ENCODING("FR32", ENCODING_VVVV)
ENCODING("FR64", ENCODING_VVVV)
ENCODING("VR128", ENCODING_VVVV)
ENCODING("VR256", ENCODING_VVVV)
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
llvm_unreachable("Unhandled VEX.vvvv register encoding");
}
OperandEncoding RecognizableInstr::memoryEncodingFromString
(const std::string &s,
bool hasOpSizePrefix) {
@ -951,6 +1079,7 @@ OperandEncoding RecognizableInstr::memoryEncodingFromString
ENCODING("f64mem", ENCODING_RM)
ENCODING("f32mem", ENCODING_RM)
ENCODING("i128mem", ENCODING_RM)
ENCODING("i256mem", ENCODING_RM)
ENCODING("f80mem", ENCODING_RM)
ENCODING("lea32mem", ENCODING_RM)
ENCODING("lea64_32mem", ENCODING_RM)

View File

@ -52,8 +52,14 @@ private:
bool HasOpSizePrefix;
/// The hasREX_WPrefix field from the record
bool HasREX_WPrefix;
/// The hasVEXPrefix field from the record
bool HasVEXPrefix;
/// The hasVEX_4VPrefix field from the record
bool HasVEX_4VPrefix;
/// The hasVEX_WPrefix field from the record
bool HasVEX_WPrefix;
/// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
bool HasVEX_LPrefix;
/// The hasLockPrefix field from the record
bool HasLockPrefix;
/// The isCodeGenOnly filed from the record
@ -96,7 +102,7 @@ private:
// error if it conflcits with any other FILTER_NORMAL
// instruction
};
/// filter - Determines whether the instruction should be decodable. Some
/// instructions are pure intrinsics and use unencodable operands; many
/// synthetic instructions are duplicates of other instructions; other
@ -106,6 +112,12 @@ private:
///
/// @return - The degree of filtering to be applied (see filter_ret).
filter_ret filter() const;
/// hasFROperands - Returns true if any operand is a FR operand.
bool hasFROperands() const;
/// has256BitOperands - Returns true if any operand is a 256-bit SSE operand.
bool has256BitOperands() const;
/// typeFromString - Translates an operand type from the string provided in
/// the LLVM tables to an OperandType for use in the operand specifier.
@ -155,6 +167,8 @@ private:
bool hasOpSizePrefix);
static OperandEncoding opcodeModifierEncodingFromString(const std::string &s,
bool hasOpSizePrefix);
static OperandEncoding vvvvRegisterEncodingFromString(const std::string &s,
bool HasOpSizePrefix);
/// handleOperand - Converts a single operand from the LLVM table format to
/// the emitted table format, handling any duplicate operands it encounters