llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

3739 lines
135 KiB
C++
Raw Normal View History

//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86IntelInstPrinter.h"
#include "MCTargetDesc/X86MCExpr.h"
#include "MCTargetDesc/X86TargetStreamer.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86AsmParserCommon.h"
#include "X86Operand.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
using namespace llvm;
static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
return true;
}
return false;
}
namespace {
static const char OpPrecedence[] = {
0, // IC_OR
1, // IC_XOR
2, // IC_AND
3, // IC_LSHIFT
3, // IC_RSHIFT
4, // IC_PLUS
4, // IC_MINUS
5, // IC_MULTIPLY
5, // IC_DIVIDE
5, // IC_MOD
6, // IC_NOT
7, // IC_NEG
8, // IC_RPAREN
9, // IC_LPAREN
0, // IC_IMM
0 // IC_REGISTER
};
class X86AsmParser : public MCTargetAsmParser {
ParseInstructionInfo *InstInfo;
bool Code16GCC;
2015-09-22 19:14:39 +08:00
enum VEXEncoding {
VEXEncoding_Default,
VEXEncoding_VEX2,
VEXEncoding_VEX3,
VEXEncoding_EVEX,
};
VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
private:
SMLoc consumeToken() {
MCAsmParser &Parser = getParser();
SMLoc Result = Parser.getTok().getLoc();
Parser.Lex();
return Result;
}
X86TargetStreamer &getTargetStreamer() {
assert(getParser().getStreamer().getTargetStreamer() &&
"do not have a target streamer");
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<X86TargetStreamer &>(TS);
}
unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
bool matchingInlineAsm, unsigned VariantID = 0) {
// In Code16GCC mode, match as 32-bit.
if (Code16GCC)
SwitchMode(X86::Mode32Bit);
unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MissingFeatures, matchingInlineAsm,
VariantID);
if (Code16GCC)
SwitchMode(X86::Mode16Bit);
return rv;
}
enum InfixCalculatorTok {
IC_OR = 0,
IC_XOR,
IC_AND,
IC_LSHIFT,
IC_RSHIFT,
IC_PLUS,
IC_MINUS,
IC_MULTIPLY,
IC_DIVIDE,
IC_MOD,
IC_NOT,
IC_NEG,
IC_RPAREN,
IC_LPAREN,
IC_IMM,
IC_REGISTER
};
enum IntelOperatorKind {
IOK_INVALID = 0,
IOK_LENGTH,
IOK_SIZE,
IOK_TYPE,
IOK_OFFSET
};
class InfixCalculator {
typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
bool isUnaryOperator(const InfixCalculatorTok Op) {
return Op == IC_NEG || Op == IC_NOT;
}
public:
int64_t popOperand() {
assert (!PostfixStack.empty() && "Poped an empty stack!");
ICToken Op = PostfixStack.pop_back_val();
if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
return -1; // The invalid Scale value will be caught later by checkScale
return Op.second;
}
void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
assert ((Op == IC_IMM || Op == IC_REGISTER) &&
"Unexpected operand!");
PostfixStack.push_back(std::make_pair(Op, Val));
}
void popOperator() { InfixOperatorStack.pop_back(); }
void pushOperator(InfixCalculatorTok Op) {
// Push the new operator if the stack is empty.
if (InfixOperatorStack.empty()) {
InfixOperatorStack.push_back(Op);
return;
}
// Push the new operator if it has a higher precedence than the operator
// on the top of the stack or the operator on the top of the stack is a
// left parentheses.
unsigned Idx = InfixOperatorStack.size() - 1;
InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
InfixOperatorStack.push_back(Op);
return;
}
// The operator on the top of the stack has higher precedence than the
// new operator.
unsigned ParenCount = 0;
while (1) {
// Nothing to process.
if (InfixOperatorStack.empty())
break;
Idx = InfixOperatorStack.size() - 1;
StackOp = InfixOperatorStack[Idx];
if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
break;
// If we have an even parentheses count and we see a left parentheses,
// then stop processing.
if (!ParenCount && StackOp == IC_LPAREN)
break;
if (StackOp == IC_RPAREN) {
++ParenCount;
InfixOperatorStack.pop_back();
} else if (StackOp == IC_LPAREN) {
--ParenCount;
InfixOperatorStack.pop_back();
} else {
InfixOperatorStack.pop_back();
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
}
// Push the new operator.
InfixOperatorStack.push_back(Op);
}
int64_t execute() {
// Push any remaining operators onto the postfix stack.
while (!InfixOperatorStack.empty()) {
InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
if (PostfixStack.empty())
return 0;
SmallVector<ICToken, 16> OperandStack;
for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
ICToken Op = PostfixStack[i];
if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
OperandStack.push_back(Op);
} else if (isUnaryOperator(Op.first)) {
assert (OperandStack.size() > 0 && "Too few operands.");
ICToken Operand = OperandStack.pop_back_val();
assert (Operand.first == IC_IMM &&
"Unary operation with a register!");
switch (Op.first) {
default:
report_fatal_error("Unexpected operator!");
break;
case IC_NEG:
OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
break;
case IC_NOT:
OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
break;
}
} else {
assert (OperandStack.size() > 1 && "Too few operands.");
int64_t Val;
ICToken Op2 = OperandStack.pop_back_val();
ICToken Op1 = OperandStack.pop_back_val();
switch (Op.first) {
default:
report_fatal_error("Unexpected operator!");
break;
case IC_PLUS:
Val = Op1.second + Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_MINUS:
Val = Op1.second - Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_MULTIPLY:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Multiply operation with an immediate and a register!");
Val = Op1.second * Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_DIVIDE:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Divide operation with an immediate and a register!");
assert (Op2.second != 0 && "Division by zero!");
Val = Op1.second / Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_MOD:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Modulo operation with an immediate and a register!");
Val = Op1.second % Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_OR:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Or operation with an immediate and a register!");
Val = Op1.second | Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_XOR:
assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Xor operation with an immediate and a register!");
Val = Op1.second ^ Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_AND:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"And operation with an immediate and a register!");
Val = Op1.second & Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_LSHIFT:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Left shift operation with an immediate and a register!");
Val = Op1.second << Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
case IC_RSHIFT:
assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
"Right shift operation with an immediate and a register!");
Val = Op1.second >> Op2.second;
OperandStack.push_back(std::make_pair(IC_IMM, Val));
break;
}
}
}
assert (OperandStack.size() == 1 && "Expected a single result.");
return OperandStack.pop_back_val().second;
}
};
enum IntelExprState {
IES_INIT,
IES_OR,
IES_XOR,
IES_AND,
IES_LSHIFT,
IES_RSHIFT,
IES_PLUS,
IES_MINUS,
IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
IES_MOD,
IES_LBRAC,
IES_RBRAC,
IES_LPAREN,
IES_RPAREN,
IES_REGISTER,
IES_INTEGER,
IES_IDENTIFIER,
IES_ERROR
};
class IntelExprStateMachine {
IntelExprState State, PrevState;
unsigned BaseReg, IndexReg, TmpReg, Scale;
int64_t Imm;
const MCExpr *Sym;
StringRef SymName;
InfixCalculator IC;
InlineAsmIdentifierInfo Info;
short BracCount;
bool MemExpr;
2015-09-22 19:14:39 +08:00
public:
IntelExprStateMachine()
: State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
MemExpr(false) {}
void addImm(int64_t imm) { Imm += imm; }
short getBracCount() { return BracCount; }
bool isMemExpr() { return MemExpr; }
unsigned getBaseReg() { return BaseReg; }
unsigned getIndexReg() { return IndexReg; }
unsigned getScale() { return Scale; }
const MCExpr *getSym() { return Sym; }
StringRef getSymName() { return SymName; }
int64_t getImm() { return Imm + IC.execute(); }
bool isValidEndState() {
return State == IES_RBRAC || State == IES_INTEGER;
}
bool hadError() { return State == IES_ERROR; }
InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
void onOr() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_OR;
IC.pushOperator(IC_OR);
break;
}
PrevState = CurrState;
}
void onXor() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_XOR;
IC.pushOperator(IC_XOR);
break;
}
PrevState = CurrState;
}
void onAnd() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_AND;
IC.pushOperator(IC_AND);
break;
}
PrevState = CurrState;
}
void onLShift() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_LSHIFT;
IC.pushOperator(IC_LSHIFT);
break;
}
PrevState = CurrState;
}
void onRShift() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_RSHIFT;
IC.pushOperator(IC_RSHIFT);
break;
}
PrevState = CurrState;
}
bool onPlus(StringRef &ErrMsg) {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
case IES_REGISTER:
State = IES_PLUS;
IC.pushOperator(IC_PLUS);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// no explicit scale.
if (!BaseReg) {
BaseReg = TmpReg;
} else {
if (IndexReg) {
ErrMsg = "BaseReg/IndexReg already set!";
return true;
}
IndexReg = TmpReg;
Scale = 0;
}
}
break;
}
PrevState = CurrState;
return false;
}
bool onMinus(StringRef &ErrMsg) {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_OR:
case IES_XOR:
case IES_AND:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_PLUS:
case IES_NOT:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_MOD:
case IES_LPAREN:
case IES_RPAREN:
case IES_LBRAC:
case IES_RBRAC:
case IES_INTEGER:
case IES_REGISTER:
case IES_INIT:
State = IES_MINUS;
// push minus operator if it is not a negate operator
if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
CurrState == IES_INTEGER || CurrState == IES_RBRAC)
IC.pushOperator(IC_MINUS);
else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
// We have negate operator for Scale: it's illegal
ErrMsg = "Scale can't be negative";
return true;
} else
IC.pushOperator(IC_NEG);
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// no explicit scale.
if (!BaseReg) {
BaseReg = TmpReg;
} else {
if (IndexReg) {
ErrMsg = "BaseReg/IndexReg already set!";
return true;
}
IndexReg = TmpReg;
Scale = 0;
}
}
break;
}
PrevState = CurrState;
return false;
}
void onNot() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_OR:
case IES_XOR:
case IES_AND:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_PLUS:
case IES_MINUS:
case IES_NOT:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_MOD:
case IES_LPAREN:
case IES_LBRAC:
case IES_INIT:
State = IES_NOT;
IC.pushOperator(IC_NOT);
break;
}
PrevState = CurrState;
}
bool onRegister(unsigned Reg, StringRef &ErrMsg) {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_LPAREN:
case IES_LBRAC:
State = IES_REGISTER;
TmpReg = Reg;
IC.pushOperand(IC_REGISTER);
break;
case IES_MULTIPLY:
// Index Register - Scale * Register
if (PrevState == IES_INTEGER) {
if (IndexReg) {
ErrMsg = "BaseReg/IndexReg already set!";
return true;
}
State = IES_REGISTER;
IndexReg = Reg;
// Get the scale and replace the 'Scale * Register' with '0'.
Scale = IC.popOperand();
if (checkScale(Scale, ErrMsg))
return true;
IC.pushOperand(IC_IMM);
IC.popOperator();
} else {
State = IES_ERROR;
}
break;
}
PrevState = CurrState;
return false;
}
bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
const InlineAsmIdentifierInfo &IDInfo,
bool ParsingInlineAsm, StringRef &ErrMsg) {
// InlineAsm: Treat an enum value as an integer
if (ParsingInlineAsm)
if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
// Treat a symbolic constant like an integer
if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
return onInteger(CE->getValue(), ErrMsg);
PrevState = State;
bool HasSymbol = Sym != nullptr;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MINUS:
case IES_NOT:
case IES_INIT:
case IES_LBRAC:
MemExpr = true;
State = IES_INTEGER;
Sym = SymRef;
SymName = SymRefName;
IC.pushOperand(IC_IMM);
if (ParsingInlineAsm)
Info = IDInfo;
break;
}
if (HasSymbol)
ErrMsg = "cannot use more than one symbol in memory operand";
return HasSymbol;
}
bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MINUS:
case IES_NOT:
case IES_OR:
case IES_XOR:
case IES_AND:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_DIVIDE:
case IES_MOD:
case IES_MULTIPLY:
case IES_LPAREN:
case IES_INIT:
case IES_LBRAC:
State = IES_INTEGER;
if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
// Index Register - Register * Scale
if (IndexReg) {
ErrMsg = "BaseReg/IndexReg already set!";
return true;
}
IndexReg = TmpReg;
Scale = TmpInt;
if (checkScale(Scale, ErrMsg))
return true;
// Get the scale and replace the 'Register * Scale' with '0'.
IC.popOperator();
} else {
IC.pushOperand(IC_IMM, TmpInt);
}
break;
}
PrevState = CurrState;
return false;
}
void onStar() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_REGISTER:
case IES_RPAREN:
State = IES_MULTIPLY;
IC.pushOperator(IC_MULTIPLY);
break;
}
}
void onDivide() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
State = IES_DIVIDE;
IC.pushOperator(IC_DIVIDE);
break;
}
}
void onMod() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_RPAREN:
State = IES_MOD;
IC.pushOperator(IC_MOD);
break;
}
}
bool onLBrac() {
if (BracCount)
return true;
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_RBRAC:
case IES_INTEGER:
case IES_RPAREN:
State = IES_PLUS;
IC.pushOperator(IC_PLUS);
break;
case IES_INIT:
assert(!BracCount && "BracCount should be zero on parsing's start");
State = IES_LBRAC;
break;
}
MemExpr = true;
BracCount++;
return false;
}
bool onRBrac() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_REGISTER:
case IES_RPAREN:
if (BracCount-- != 1)
return true;
State = IES_RBRAC;
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
// no explicit scale.
if (!BaseReg) {
BaseReg = TmpReg;
} else {
assert (!IndexReg && "BaseReg/IndexReg already set!");
IndexReg = TmpReg;
Scale = 0;
}
}
break;
}
PrevState = CurrState;
return false;
}
void onLParen() {
IntelExprState CurrState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_PLUS:
case IES_MINUS:
case IES_NOT:
case IES_OR:
case IES_XOR:
case IES_AND:
case IES_LSHIFT:
case IES_RSHIFT:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_MOD:
case IES_LPAREN:
case IES_INIT:
case IES_LBRAC:
State = IES_LPAREN;
IC.pushOperator(IC_LPAREN);
break;
}
PrevState = CurrState;
}
void onRParen() {
PrevState = State;
switch (State) {
default:
State = IES_ERROR;
break;
case IES_INTEGER:
case IES_REGISTER:
case IES_RPAREN:
State = IES_RPAREN;
IC.pushOperator(IC_RPAREN);
break;
}
}
};
bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
bool MatchingInlineAsm = false) {
MCAsmParser &Parser = getParser();
if (MatchingInlineAsm) {
if (!getLexer().isAtStartOfStatement())
Parser.eatToEndOfStatement();
return false;
}
return Parser.Error(L, Msg, Range);
}
std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
Error(Loc, Msg, R);
return nullptr;
}
std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
bool IsSIReg(unsigned Reg);
unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
void
AddDefaultSrcDestOperands(OperandVector &Operands,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands);
std::unique_ptr<X86Operand> ParseOperand();
std::unique_ptr<X86Operand> ParseATTOperand();
std::unique_ptr<X86Operand> ParseIntelOperand();
std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
SMLoc End);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End);
std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
const MCExpr *&Disp,
const SMLoc &StartLoc,
SMLoc &EndLoc);
X86::CondCode ParseConditionCode(StringRef CCode);
bool ParseIntelMemoryOperandSize(unsigned &Size);
std::unique_ptr<X86Operand>
CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
unsigned IndexReg, unsigned Scale, SMLoc Start,
SMLoc End, unsigned Size, StringRef Identifier,
const InlineAsmIdentifierInfo &Info);
bool parseDirectiveEven(SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
/// CodeView FPO data directives.
bool parseDirectiveFPOProc(SMLoc L);
bool parseDirectiveFPOSetFrame(SMLoc L);
bool parseDirectiveFPOPushReg(SMLoc L);
bool parseDirectiveFPOStackAlloc(SMLoc L);
bool parseDirectiveFPOStackAlign(SMLoc L);
bool parseDirectiveFPOEndPrologue(SMLoc L);
bool parseDirectiveFPOEndProc(SMLoc L);
bool parseDirectiveFPOData(SMLoc L);
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops);
/// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
/// instrumentation around Inst.
void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
MCStreamer &Out, bool MatchingInlineAsm);
bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
bool MatchingInlineAsm);
bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm);
bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm);
bool OmitRegisterFromClobberLists(unsigned RegNo) override;
/// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
/// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
/// return false if no parsing errors occurred, true otherwise.
bool HandleAVX512Operand(OperandVector &Operands,
const MCParsedAsmOperand &Op);
bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
return getSTI().getFeatureBits()[X86::Mode64Bit];
}
bool is32BitMode() const {
// FIXME: Can tablegen auto-generate this?
return getSTI().getFeatureBits()[X86::Mode32Bit];
}
bool is16BitMode() const {
// FIXME: Can tablegen auto-generate this?
return getSTI().getFeatureBits()[X86::Mode16Bit];
}
void SwitchMode(unsigned mode) {
MCSubtargetInfo &STI = copySTI();
FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
FeatureBitset FB = ComputeAvailableFeatures(
STI.ToggleFeature(OldMode.flip(mode)));
setAvailableFeatures(FB);
2015-09-22 19:14:39 +08:00
assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
}
unsigned getPointerWidth() {
if (is16BitMode()) return 16;
if (is32BitMode()) return 32;
if (is64BitMode()) return 64;
llvm_unreachable("invalid mode");
}
bool isParsingIntelSyntax() {
return getParser().getAssemblerDialect();
}
/// @name Auto-generated Matcher Functions
/// {
2010-10-09 19:00:50 +08:00
#define GET_ASSEMBLER_HEADER
#include "X86GenAsmMatcher.inc"
2010-10-09 19:00:50 +08:00
/// }
public:
enum X86MatchResultTy {
Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "X86GenAsmMatcher.inc"
};
X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
Code16GCC(false) {
2010-10-09 19:00:50 +08:00
Parser.addAliasForDirective(".word", ".2byte");
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
};
} // end anonymous namespace
/// @name Auto-generated Match Functions
/// {
static unsigned MatchRegisterName(StringRef Name);
/// }
static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
unsigned Scale, bool Is64BitMode,
StringRef &ErrMsg) {
// If we have both a base register and an index register make sure they are
// both 64-bit or 32-bit registers.
// To support VSIB, IndexReg can be 128-bit or 256-bit registers.
if (BaseReg != 0 &&
!(BaseReg == X86::RIP || BaseReg == X86::EIP ||
X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
ErrMsg = "invalid base+index expression";
return true;
}
if (IndexReg != 0 &&
!(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
ErrMsg = "invalid base+index expression";
return true;
}
if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
IndexReg == X86::EIP || IndexReg == X86::RIP ||
IndexReg == X86::ESP || IndexReg == X86::RSP) {
ErrMsg = "invalid base+index expression";
return true;
}
// Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
// and then only in non-64-bit modes.
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
(Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
BaseReg != X86::SI && BaseReg != X86::DI))) {
ErrMsg = "invalid 16-bit base register";
return true;
}
if (BaseReg == 0 &&
X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
ErrMsg = "16-bit memory operand may not include only index register";
return true;
}
if (BaseReg != 0 && IndexReg != 0) {
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
(X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
IndexReg == X86::EIZ)) {
ErrMsg = "base register is 64-bit, but index register is not";
return true;
}
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
(X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
IndexReg == X86::RIZ)) {
ErrMsg = "base register is 32-bit, but index register is not";
return true;
}
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
ErrMsg = "base register is 16-bit, but index register is not";
return true;
}
if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
(IndexReg != X86::SI && IndexReg != X86::DI)) {
ErrMsg = "invalid 16-bit base/index register combination";
return true;
}
}
}
// RIP/EIP-relative addressing is only supported in 64-bit mode.
if (!Is64BitMode && BaseReg != 0 &&
(BaseReg == X86::RIP || BaseReg == X86::EIP)) {
ErrMsg = "IP-relative addressing requires 64-bit mode";
return true;
}
return checkScale(Scale, ErrMsg);
}
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
RegNo = 0;
const AsmToken &PercentTok = Parser.getTok();
StartLoc = PercentTok.getLoc();
// If we encounter a %, ignore it. This code handles registers with and
// without the prefix, unprefixed registers can occur in cfi directives.
if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
Parser.Lex(); // Eat percent token.
const AsmToken &Tok = Parser.getTok();
EndLoc = Tok.getEndLoc();
if (Tok.isNot(AsmToken::Identifier)) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
SMRange(StartLoc, EndLoc));
}
RegNo = MatchRegisterName(Tok.getString());
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
RegNo = MatchRegisterName(Tok.getString().lower());
2010-10-09 19:00:50 +08:00
// The "flags" register cannot be referenced directly.
// Treat it as an identifier instead.
if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
RegNo = 0;
if (!is64BitMode()) {
// FIXME: This should be done using Requires<Not64BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
// checked.
// FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
// REX prefix.
if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo)) {
StringRef RegName = Tok.getString();
Parser.Lex(); // Eat register name.
return Error(StartLoc,
"register %" + RegName + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
}
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == X86::ST0) {
Parser.Lex(); // Eat 'st'
// Check to see if we have '(4)' after %st.
if (getLexer().isNot(AsmToken::LParen))
return false;
// Lex the paren.
getParser().Lex();
const AsmToken &IntTok = Parser.getTok();
if (IntTok.isNot(AsmToken::Integer))
return Error(IntTok.getLoc(), "expected stack index");
switch (IntTok.getIntVal()) {
case 0: RegNo = X86::ST0; break;
case 1: RegNo = X86::ST1; break;
case 2: RegNo = X86::ST2; break;
case 3: RegNo = X86::ST3; break;
case 4: RegNo = X86::ST4; break;
case 5: RegNo = X86::ST5; break;
case 6: RegNo = X86::ST6; break;
case 7: RegNo = X86::ST7; break;
default: return Error(IntTok.getLoc(), "invalid stack index");
}
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat ')'
return false;
}
EndLoc = Parser.getTok().getEndLoc();
// If this is "db[0-15]", match it as an alias
// for dr[0-15].
if (RegNo == 0 && Tok.getString().startswith("db")) {
if (Tok.getString().size() == 3) {
switch (Tok.getString()[2]) {
case '0': RegNo = X86::DR0; break;
case '1': RegNo = X86::DR1; break;
case '2': RegNo = X86::DR2; break;
case '3': RegNo = X86::DR3; break;
case '4': RegNo = X86::DR4; break;
case '5': RegNo = X86::DR5; break;
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
case '8': RegNo = X86::DR8; break;
case '9': RegNo = X86::DR9; break;
}
} else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
switch (Tok.getString()[3]) {
case '0': RegNo = X86::DR10; break;
case '1': RegNo = X86::DR11; break;
case '2': RegNo = X86::DR12; break;
case '3': RegNo = X86::DR13; break;
case '4': RegNo = X86::DR14; break;
case '5': RegNo = X86::DR15; break;
}
}
if (RegNo != 0) {
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat it.
return false;
}
}
if (RegNo == 0) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
SMRange(StartLoc, EndLoc));
}
Parser.Lex(); // Eat identifier token.
return false;
}
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
bool Parse32 = is32BitMode() || Code16GCC;
unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
const MCExpr *Disp = MCConstantExpr::create(0, getContext());
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
/*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
Loc, Loc, 0);
}
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
bool Parse32 = is32BitMode() || Code16GCC;
unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
const MCExpr *Disp = MCConstantExpr::create(0, getContext());
return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
/*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
Loc, Loc, 0);
}
bool X86AsmParser::IsSIReg(unsigned Reg) {
switch (Reg) {
default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
case X86::RSI:
case X86::ESI:
case X86::SI:
return true;
case X86::RDI:
case X86::EDI:
case X86::DI:
return false;
}
}
unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
bool IsSIReg) {
switch (RegClassID) {
default: llvm_unreachable("Unexpected register class");
case X86::GR64RegClassID:
return IsSIReg ? X86::RSI : X86::RDI;
case X86::GR32RegClassID:
return IsSIReg ? X86::ESI : X86::EDI;
case X86::GR16RegClassID:
return IsSIReg ? X86::SI : X86::DI;
}
}
void X86AsmParser::AddDefaultSrcDestOperands(
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
if (isParsingIntelSyntax()) {
Operands.push_back(std::move(Dst));
Operands.push_back(std::move(Src));
}
else {
Operands.push_back(std::move(Src));
Operands.push_back(std::move(Dst));
}
}
bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands) {
if (OrigOperands.size() > 1) {
2016-02-16 15:45:07 +08:00
// Check if sizes match, OrigOperands also contains the instruction name
assert(OrigOperands.size() == FinalOperands.size() + 1 &&
2016-02-16 15:45:07 +08:00
"Operand size mismatch");
SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
// Verify types match
int RegClassID = -1;
for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
if (FinalOp.isReg() &&
(!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
// Return false and let a normal complaint about bogus operands happen
return false;
if (FinalOp.isMem()) {
if (!OrigOp.isMem())
// Return false and let a normal complaint about bogus operands happen
return false;
unsigned OrigReg = OrigOp.Mem.BaseReg;
unsigned FinalReg = FinalOp.Mem.BaseReg;
// If we've already encounterd a register class, make sure all register
// bases are of the same register class
if (RegClassID != -1 &&
!X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
return Error(OrigOp.getStartLoc(),
"mismatching source and destination index registers");
}
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
RegClassID = X86::GR64RegClassID;
else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
RegClassID = X86::GR32RegClassID;
else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
RegClassID = X86::GR16RegClassID;
else
// Unexpected register class type
// Return false and let a normal complaint about bogus operands happen
return false;
bool IsSI = IsSIReg(FinalReg);
FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
if (FinalReg != OrigReg) {
std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
Warnings.push_back(std::make_pair(
OrigOp.getStartLoc(),
"memory operand is only for determining the size, " + RegName +
" will be used for the location"));
}
FinalOp.Mem.Size = OrigOp.Mem.Size;
FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
FinalOp.Mem.BaseReg = FinalReg;
}
}
// Produce warnings only if all the operands passed the adjustment - prevent
// legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
for (auto &WarningMsg : Warnings) {
Warning(WarningMsg.first, WarningMsg.second);
}
// Remove old operands
for (unsigned int i = 0; i < FinalOperands.size(); ++i)
OrigOperands.pop_back();
}
// OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
for (unsigned int i = 0; i < FinalOperands.size(); ++i)
OrigOperands.push_back(std::move(FinalOperands[i]));
return false;
}
std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
if (isParsingIntelSyntax())
return ParseIntelOperand();
return ParseATTOperand();
}
std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
const InlineAsmIdentifierInfo &Info) {
// If we found a decl other than a VarDecl, then assume it is a FuncDecl or
// some other label reference.
if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
// Insert an explicit size if the user didn't have one.
if (!Size) {
Size = getPointerWidth();
InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
/*Len=*/0, Size);
}
// Create an absolute memory reference in order to match against
// instructions taking a PC relative operand.
return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
Identifier, Info.Label.Decl);
}
// We either have a direct symbol reference, or an offset from a symbol. The
// parser always puts the symbol on the LHS, so look there for size
// calculation purposes.
unsigned FrontendSize = 0;
void *Decl = nullptr;
bool IsGlobalLV = false;
if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
// Size is in terms of bits in this context.
FrontendSize = Info.Var.Type * 8;
Decl = Info.Var.Decl;
IsGlobalLV = Info.Var.IsGlobalLV;
}
// It is widely common for MS InlineAsm to use a global variable and one/two
// registers in a mmory expression, and though unaccessible via rip/eip.
if (IsGlobalLV && (BaseReg || IndexReg)) {
return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
// Otherwise, we set the base register to a non-zero value
// if we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
} else {
BaseReg = BaseReg ? BaseReg : 1;
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
IndexReg, Scale, Start, End, Size, Identifier,
Decl, FrontendSize);
}
}
// Some binary bitwise operators have a named synonymous
// Query a candidate string for being such a named operator
// and if so - invoke the appropriate handler
bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
// A named operator should be either lower or upper case, but not a mix
if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
return false;
if (Name.equals_lower("not"))
SM.onNot();
else if (Name.equals_lower("or"))
SM.onOr();
else if (Name.equals_lower("shl"))
SM.onLShift();
else if (Name.equals_lower("shr"))
SM.onRShift();
else if (Name.equals_lower("xor"))
SM.onXor();
else if (Name.equals_lower("and"))
SM.onAnd();
else if (Name.equals_lower("mod"))
SM.onMod();
else
return false;
return true;
}
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
StringRef ErrMsg;
2012-06-28 06:34:28 +08:00
AsmToken::TokenKind PrevTK = AsmToken::Error;
bool Done = false;
while (!Done) {
bool UpdateLocLex = true;
AsmToken::TokenKind TK = getLexer().getKind();
switch (TK) {
default:
if ((Done = SM.isValidEndState()))
break;
return Error(Tok.getLoc(), "unknown token in expression");
case AsmToken::EndOfStatement:
Done = true;
break;
case AsmToken::Real:
// DotOperator: [ebx].0
UpdateLocLex = false;
if (ParseIntelDotOperator(SM, End))
return true;
break;
case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
// Register
unsigned Reg;
if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
if (SM.onRegister(Reg, ErrMsg))
return Error(Tok.getLoc(), ErrMsg);
break;
}
// Operator synonymous ("not", "or" etc.)
if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
break;
// Symbol reference, when parsing assembly content
InlineAsmIdentifierInfo Info;
const MCExpr *Val;
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End)) {
return Error(Tok.getLoc(), "Unexpected identifier!");
} else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
return Error(IdentLoc, ErrMsg);
} else
break;
}
// MS InlineAsm operators (TYPE/LENGTH/SIZE)
if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
if (OpKind == IOK_OFFSET)
return Error(IdentLoc, "Dealing OFFSET operator as part of"
"a compound immediate expression is yet to be supported");
if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
if (SM.onInteger(Val, ErrMsg))
return Error(IdentLoc, ErrMsg);
} else
return true;
break;
}
// MS Dot Operator expression
if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
if (ParseIntelDotOperator(SM, End))
return true;
break;
}
// MS InlineAsm identifier
// Call parseIdentifier() to combine @ with the identifier behind it.
if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
return Error(IdentLoc, "expected identifier");
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
return true;
else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
return Error(IdentLoc, ErrMsg);
break;
}
case AsmToken::Integer: {
// Look for 'b' or 'f' following an Integer as a directional label
SMLoc Loc = getTok().getLoc();
int64_t IntVal = getTok().getIntVal();
End = consumeToken();
UpdateLocLex = false;
if (getLexer().getKind() == AsmToken::Identifier) {
StringRef IDVal = getTok().getString();
if (IDVal == "f" || IDVal == "b") {
MCSymbol *Sym =
getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Val =
2015-09-22 19:15:07 +08:00
MCSymbolRefExpr::create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
InlineAsmIdentifierInfo Info;
if (SM.onIdentifierExpr(Val, Identifier, Info,
isParsingInlineAsm(), ErrMsg))
return Error(Loc, ErrMsg);
End = consumeToken();
} else {
if (SM.onInteger(IntVal, ErrMsg))
return Error(Loc, ErrMsg);
}
} else {
if (SM.onInteger(IntVal, ErrMsg))
return Error(Loc, ErrMsg);
}
break;
}
case AsmToken::Plus:
if (SM.onPlus(ErrMsg))
return Error(getTok().getLoc(), ErrMsg);
break;
case AsmToken::Minus:
if (SM.onMinus(ErrMsg))
return Error(getTok().getLoc(), ErrMsg);
break;
case AsmToken::Tilde: SM.onNot(); break;
case AsmToken::Star: SM.onStar(); break;
case AsmToken::Slash: SM.onDivide(); break;
case AsmToken::Percent: SM.onMod(); break;
case AsmToken::Pipe: SM.onOr(); break;
case AsmToken::Caret: SM.onXor(); break;
case AsmToken::Amp: SM.onAnd(); break;
case AsmToken::LessLess:
SM.onLShift(); break;
case AsmToken::GreaterGreater:
SM.onRShift(); break;
case AsmToken::LBrac:
if (SM.onLBrac())
return Error(Tok.getLoc(), "unexpected bracket encountered");
break;
case AsmToken::RBrac:
if (SM.onRBrac())
return Error(Tok.getLoc(), "unexpected bracket encountered");
break;
case AsmToken::LParen: SM.onLParen(); break;
case AsmToken::RParen: SM.onRParen(); break;
}
if (SM.hadError())
return Error(Tok.getLoc(), "unknown token in expression");
if (!Done && UpdateLocLex)
End = consumeToken();
PrevTK = TK;
}
return false;
}
void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
SMLoc Start, SMLoc End) {
SMLoc Loc = Start;
unsigned ExprLen = End.getPointer() - Start.getPointer();
// Skip everything before a symbol displacement (if we have one)
if (SM.getSym()) {
StringRef SymName = SM.getSymName();
if (unsigned Len = SymName.data() - Start.getPointer())
InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
ExprLen = End.getPointer() - (SymName.data() + SymName.size());
// If we have only a symbol than there's no need for complex rewrite,
// simply skip everything after it
if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
if (ExprLen)
InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
return;
}
}
// Build an Intel Expression rewrite
StringRef BaseRegStr;
StringRef IndexRegStr;
if (SM.getBaseReg())
BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
if (SM.getIndexReg())
IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
// Emit it
IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
}
// Inline assembly may use variable names with namespace alias qualifiers.
bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand,
SMLoc &End) {
MCAsmParser &Parser = getParser();
assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
StringRef LineBuf(Identifier.data());
SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
const AsmToken &Tok = Parser.getTok();
SMLoc Loc = Tok.getLoc();
// Advance the token stream until the end of the current token is
// after the end of what the frontend claimed.
const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
do {
End = Tok.getEndLoc();
getLexer().Lex();
} while (End.getPointer() < EndPtr);
Identifier = LineBuf;
// The frontend should end parsing on an assembler token boundary, unless it
// failed parsing.
assert((End.getPointer() == EndPtr ||
Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
"frontend claimed part of a token?");
// If the identifier lookup was unsuccessful, assume that we are dealing with
// a label.
if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
StringRef InternalName =
SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
Loc, false);
assert(InternalName.size() && "We should have an internal name here.");
// Push a rewrite for replacing the identifier name with the internal name.
InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
InternalName);
} else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
return false;
// Create the symbol reference.
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
return false;
}
//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
std::unique_ptr<X86Operand>
X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
if (Tok.isNot(AsmToken::Identifier))
return ErrorOperand(Tok.getLoc(), "Expected an identifier after {");
if (Tok.getIdentifier().startswith("r")){
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
.Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
.Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
.Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
.Default(-1);
if (-1 == rndMode)
return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
Parser.Lex(); // Eat "r*" of r*-sae
if (!getLexer().is(AsmToken::Minus))
return ErrorOperand(Tok.getLoc(), "Expected - at this point");
Parser.Lex(); // Eat "-"
Parser.Lex(); // Eat the sae
if (!getLexer().is(AsmToken::RCurly))
return ErrorOperand(Tok.getLoc(), "Expected } at this point");
SMLoc End = Tok.getEndLoc();
Parser.Lex(); // Eat "}"
const MCExpr *RndModeOp =
MCConstantExpr::create(rndMode, Parser.getContext());
return X86Operand::CreateImm(RndModeOp, Start, End);
}
if(Tok.getIdentifier().equals("sae")){
Parser.Lex(); // Eat the sae
if (!getLexer().is(AsmToken::RCurly))
return ErrorOperand(Tok.getLoc(), "Expected } at this point");
Parser.Lex(); // Eat "}"
return X86Operand::CreateToken("{sae}", consumedToken);
}
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
}
/// Parse the '.' operator.
bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
const AsmToken &Tok = getTok();
unsigned Offset;
// Drop the optional '.'.
StringRef DotDispStr = Tok.getString();
if (DotDispStr.startswith("."))
DotDispStr = DotDispStr.drop_front(1);
// .Imm gets lexed as a real.
if (Tok.is(AsmToken::Real)) {
APInt DotDisp;
DotDispStr.getAsInteger(10, DotDisp);
Offset = DotDisp.getZExtValue();
} else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
Offset))
return Error(Tok.getLoc(), "Unable to lookup field reference!");
} else
return Error(Tok.getLoc(), "Unexpected token type!");
// Eat the DotExpression and update End
End = SMLoc::getFromPointer(DotDispStr.data());
const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
while (Tok.getLoc().getPointer() < DotExprEndLoc)
Lex();
SM.addImm(Offset);
return false;
}
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
const MCExpr *Val;
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
return nullptr;
void *Decl = nullptr;
// FIXME: MS evaluates "offset <Constant>" to the underlying integral
if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
return ErrorOperand(Start, "offset operator cannot yet handle constants");
else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
Decl = Info.Var.Decl;
// Don't emit the offset operator.
InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
// The offset operator will have an 'r' constraint, thus we need to create
// register operand to ensure proper matching. Just pick a GPR based on
// the size of a pointer.
bool Parse32 = is32BitMode() || Code16GCC;
unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
OffsetOfLoc, Identifier, Decl);
}
// Query a candidate string for being an Intel assembly operator
// Report back its kind, or IOK_INVALID if does not evaluated as a known one
unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
return StringSwitch<unsigned>(Name)
.Cases("TYPE","type",IOK_TYPE)
.Cases("SIZE","size",IOK_SIZE)
.Cases("LENGTH","length",IOK_LENGTH)
.Cases("OFFSET","offset",IOK_OFFSET)
.Default(IOK_INVALID);
}
/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
/// returns the number of elements in an array. It returns the value 1 for
/// non-array variables. The SIZE operator returns the size of a C or C++
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
Parser.Lex(); // Eat operator.
const MCExpr *Val = nullptr;
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
/*Unevaluated=*/true, End))
return 0;
if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
Error(Start, "unable to lookup expression");
return 0;
}
unsigned CVal = 0;
switch(OpKind) {
default: llvm_unreachable("Unexpected operand kind!");
case IOK_LENGTH: CVal = Info.Var.Length; break;
case IOK_SIZE: CVal = Info.Var.Size; break;
case IOK_TYPE: CVal = Info.Var.Type; break;
}
return CVal;
}
bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
Size = StringSwitch<unsigned>(getTok().getString())
.Cases("BYTE", "byte", 8)
.Cases("WORD", "word", 16)
.Cases("DWORD", "dword", 32)
.Cases("FLOAT", "float", 32)
.Cases("LONG", "long", 32)
.Cases("FWORD", "fword", 48)
.Cases("DOUBLE", "double", 64)
.Cases("QWORD", "qword", 64)
.Cases("MMWORD","mmword", 64)
.Cases("XWORD", "xword", 80)
.Cases("TBYTE", "tbyte", 80)
.Cases("XMMWORD", "xmmword", 128)
.Cases("YMMWORD", "ymmword", 256)
.Cases("ZMMWORD", "zmmword", 512)
.Default(0);
if (Size) {
const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
Lex(); // Eat ptr.
}
return false;
}
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
// FIXME: Offset operator
// Should be handled as part of immediate expression, as other operators
// Currently, only supported as a stand-alone operand
if (isParsingInlineAsm())
if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
return ParseIntelOffsetOfOperator();
// Parse optional Size directive.
unsigned Size;
if (ParseIntelMemoryOperandSize(Size))
return nullptr;
bool PtrInOperand = bool(Size);
Start = Tok.getLoc();
// Rounding mode operand.
if (getLexer().is(AsmToken::LCurly))
return ParseRoundingModeOp(Start);
// Register operand.
unsigned RegNo = 0;
if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
if (RegNo == X86::RIP)
return ErrorOperand(Start, "rip can only be used as a base register");
// A Register followed by ':' is considered a segment override
if (Tok.isNot(AsmToken::Colon))
return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
ErrorOperand(Start, "expected memory operand after 'ptr', "
"found register operand instead");
// An alleged segment override. check if we have a valid segment register
if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
return ErrorOperand(Start, "invalid segment register");
// Eat ':' and update Start location
Start = Lex().getLoc();
}
// Immediates and Memory
IntelExprStateMachine SM;
if (ParseIntelExpression(SM, End))
return nullptr;
if (isParsingInlineAsm())
RewriteIntelExpression(SM, Start, Tok.getLoc());
int64_t Imm = SM.getImm();
const MCExpr *Disp = SM.getSym();
const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
if (Disp && Imm)
Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
if (!Disp)
Disp = ImmDisp;
// RegNo != 0 specifies a valid segment register,
// and we are parsing a segment override
if (!SM.isMemExpr() && !RegNo)
return X86Operand::CreateImm(Disp, Start, End);
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
unsigned IndexReg = SM.getIndexReg();
unsigned Scale = SM.getScale();
if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
(IndexReg == X86::ESP || IndexReg == X86::RSP))
std::swap(BaseReg, IndexReg);
// If BaseReg is a vector register and IndexReg is not, swap them unless
// Scale was specified in which case it would be an error.
if (Scale == 0 &&
!(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
(X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
std::swap(BaseReg, IndexReg);
if (Scale != 0 &&
X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
return ErrorOperand(Start, "16-bit addresses cannot have a scale");
// If there was no explicit scale specified, change it to 1.
if (Scale == 0)
Scale = 1;
// If this is a 16-bit addressing mode with the base and index in the wrong
// order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
// shared with att syntax where order matters.
if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
(IndexReg == X86::BX || IndexReg == X86::BP))
std::swap(BaseReg, IndexReg);
if ((BaseReg || IndexReg) &&
CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
ErrMsg))
return ErrorOperand(Start, ErrMsg);
if (isParsingInlineAsm())
return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
Scale, Start, End, Size, SM.getSymName(),
SM.getIdentifierInfo());
if (!(BaseReg || IndexReg || RegNo))
return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
BaseReg, IndexReg, Scale, Start, End, Size);
}
std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
case AsmToken::Dollar: {
// $42 or $ID -> immediate.
SMLoc Start = Parser.getTok().getLoc(), End;
Parser.Lex();
const MCExpr *Val;
// This is an immediate, so we should not parse a register. Do a precheck
// for '%' to supercede intra-register parse errors.
SMLoc L = Parser.getTok().getLoc();
if (check(getLexer().is(AsmToken::Percent), L,
"expected immediate expression") ||
getParser().parseExpression(Val, End) ||
check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
return nullptr;
return X86Operand::CreateImm(Val, Start, End);
}
case AsmToken::LCurly: {
SMLoc Start = Parser.getTok().getLoc();
return ParseRoundingModeOp(Start);
}
default: {
// This a memory operand or a register. We have some parsing complications
// as a '(' may be part of an immediate expression or the addressing mode
// block. This is complicated by the fact that an assembler-level variable
// may refer either to a register or an immediate expression.
SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
const MCExpr *Expr = nullptr;
unsigned Reg = 0;
if (getLexer().isNot(AsmToken::LParen)) {
// No '(' so this is either a displacement expression or a register.
if (Parser.parseExpression(Expr, EndLoc))
return nullptr;
if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
// Segment Register. Reset Expr and copy value to register.
Expr = nullptr;
Reg = RE->getRegNo();
// Sanity check register.
if (Reg == X86::EIZ || Reg == X86::RIZ)
return ErrorOperand(
Loc, "%eiz and %riz can only be used as index registers",
SMRange(Loc, EndLoc));
if (Reg == X86::RIP)
return ErrorOperand(Loc, "%rip can only be used as a base register",
SMRange(Loc, EndLoc));
// Return register that are not segment prefixes immediately.
if (!Parser.parseOptionalToken(AsmToken::Colon))
return X86Operand::CreateReg(Reg, Loc, EndLoc);
if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
return ErrorOperand(Loc, "invalid segment register");
}
}
// This is a Memory operand.
return ParseMemOperand(Reg, Expr, Loc, EndLoc);
}
}
}
// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
// otherwise the EFLAGS Condition Code enumerator.
X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
return StringSwitch<X86::CondCode>(CC)
.Case("o", X86::COND_O) // Overflow
.Case("no", X86::COND_NO) // No Overflow
.Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
.Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
.Cases("e", "z", X86::COND_E) // Equal/Zero
.Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
.Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
.Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
.Case("s", X86::COND_S) // Sign
.Case("ns", X86::COND_NS) // No Sign
.Cases("p", "pe", X86::COND_P) // Parity/Parity Even
.Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
.Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
.Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
.Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
.Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
.Default(X86::COND_INVALID);
}
// true on failure, false otherwise
// If no {z} mark was found - Parser doesn't advance
bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
const SMLoc &StartLoc) {
MCAsmParser &Parser = getParser();
// Assuming we are just pass the '{' mark, quering the next token
// Searched for {z}, but none was found. Return false, as no parsing error was
// encountered
if (!(getLexer().is(AsmToken::Identifier) &&
(getLexer().getTok().getIdentifier() == "z")))
return false;
Parser.Lex(); // Eat z
// Query and eat the '}' mark
if (!getLexer().is(AsmToken::RCurly))
return Error(getLexer().getLoc(), "Expected } at this point");
Parser.Lex(); // Eat '}'
// Assign Z with the {z} mark opernad
Z = X86Operand::CreateToken("{z}", StartLoc);
return false;
}
// true on failure, false otherwise
bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
const MCParsedAsmOperand &Op) {
MCAsmParser &Parser = getParser();
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
// Distinguish {1to<NUM>} from {%k<NUM>}.
if(getLexer().is(AsmToken::Integer)) {
// Parse memory broadcasting ({1to<NUM>}).
if (getLexer().getTok().getIntVal() != 1)
return TokError("Expected 1to<NUM> at this point");
Parser.Lex(); // Eat "1" of 1to8
if (!getLexer().is(AsmToken::Identifier) ||
!getLexer().getTok().getIdentifier().startswith("to"))
return TokError("Expected 1to<NUM> at this point");
// Recognize only reasonable suffixes.
const char *BroadcastPrimitive =
StringSwitch<const char*>(getLexer().getTok().getIdentifier())
.Case("to2", "{1to2}")
.Case("to4", "{1to4}")
.Case("to8", "{1to8}")
.Case("to16", "{1to16}")
.Default(nullptr);
if (!BroadcastPrimitive)
return TokError("Invalid memory broadcast primitive.");
Parser.Lex(); // Eat "toN" of 1toN
if (!getLexer().is(AsmToken::RCurly))
return TokError("Expected } at this point");
Parser.Lex(); // Eat "}"
Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
consumedToken));
// No AVX512 specific primitives can pass
// after memory broadcasting, so return.
return false;
} else {
// Parse either {k}{z}, {z}{k}, {k} or {z}
// last one have no meaning, but GCC accepts it
// Currently, we're just pass a '{' mark
std::unique_ptr<X86Operand> Z;
if (ParseZ(Z, consumedToken))
return true;
// Reaching here means that parsing of the allegadly '{z}' mark yielded
// no errors.
// Query for the need of further parsing for a {%k<NUM>} mark
if (!Z || getLexer().is(AsmToken::LCurly)) {
SMLoc StartLoc = Z ? consumeToken() : consumedToken;
// Parse an op-mask register mark ({%k<NUM>}), which is now to be
// expected
unsigned RegNo;
SMLoc RegLoc;
if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
if (RegNo == X86::K0)
return Error(RegLoc, "Register k0 can't be used as write mask");
if (!getLexer().is(AsmToken::RCurly))
return Error(getLexer().getLoc(), "Expected } at this point");
Operands.push_back(X86Operand::CreateToken("{", StartLoc));
Operands.push_back(
X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
} else
return Error(getLexer().getLoc(),
"Expected an op-mask register at this point");
// {%k<NUM>} mark is found, inquire for {z}
if (getLexer().is(AsmToken::LCurly) && !Z) {
// Have we've found a parsing error, or found no (expected) {z} mark
// - report an error
if (ParseZ(Z, consumeToken()) || !Z)
return Error(getLexer().getLoc(),
"Expected a {z} mark at this point");
}
// '{z}' on its own is meaningless, hence should be ignored.
// on the contrary - have it been accompanied by a K register,
// allow it.
if (Z)
Operands.push_back(std::move(Z));
}
}
}
return false;
}
/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
/// has already been parsed if present. disp may be provided as well.
std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
const MCExpr *&Disp,
const SMLoc &StartLoc,
SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
SMLoc Loc;
// Based on the initial passed values, we may be in any of these cases, we are
// in one of these cases (with current position (*)):
// 1. seg : * disp (base-index-scale-expr)
// 2. seg : *(disp) (base-index-scale-expr)
// 3. seg : *(base-index-scale-expr)
// 4. disp *(base-index-scale-expr)
// 5. *(disp) (base-index-scale-expr)
// 6. *(base-index-scale-expr)
// 7. disp *
// 8. *(disp)
// If we do not have an displacement yet, check if we're in cases 4 or 6 by
// checking if the first object after the parenthesis is a register (or an
// identifier referring to a register) and parse the displacement or default
// to 0 as appropriate.
auto isAtMemOperand = [this]() {
if (this->getLexer().isNot(AsmToken::LParen))
return false;
AsmToken Buf[2];
StringRef Id;
auto TokCount = this->getLexer().peekTokens(Buf, true);
if (TokCount == 0)
return false;
switch (Buf[0].getKind()) {
case AsmToken::Percent:
case AsmToken::Comma:
return true;
// These lower cases are doing a peekIdentifier.
case AsmToken::At:
case AsmToken::Dollar:
if ((TokCount > 1) &&
(Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
(Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
Id = StringRef(Buf[0].getLoc().getPointer(),
Buf[1].getIdentifier().size() + 1);
break;
case AsmToken::Identifier:
case AsmToken::String:
Id = Buf[0].getIdentifier();
break;
default:
return false;
}
// We have an ID. Check if it is bound to a register.
if (!Id.empty()) {
MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
if (Sym->isVariable()) {
auto V = Sym->getVariableValue(/*SetUsed*/ false);
return isa<X86MCExpr>(V);
}
}
return false;
};
if (!Disp) {
// Parse immediate if we're not at a mem operand yet.
if (!isAtMemOperand()) {
if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
return nullptr;
assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
} else {
// Disp is implicitly zero if we haven't parsed it yet.
Disp = MCConstantExpr::create(0, Parser.getContext());
}
}
// We are now either at the end of the operand or at the '(' at the start of a
// base-index-scale-expr.
if (!parseOptionalToken(AsmToken::LParen)) {
if (SegReg == 0)
return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
StartLoc, EndLoc);
}
// If we reached here, then eat the '(' and Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
SMLoc BaseLoc = getLexer().getLoc();
const MCExpr *E;
StringRef ErrMsg;
// Parse BaseReg if one is provided.
if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
if (Parser.parseExpression(E, EndLoc) ||
check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
return nullptr;
// Sanity check register.
BaseReg = cast<X86MCExpr>(E)->getRegNo();
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
return ErrorOperand(BaseLoc,
"eiz and riz can only be used as index registers",
SMRange(BaseLoc, EndLoc));
}
if (parseOptionalToken(AsmToken::Comma)) {
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
// correctly.
//
// Even though it would be completely consistent to support syntax like
// "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().isNot(AsmToken::RParen)) {
if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
return nullptr;
if (!isa<X86MCExpr>(E)) {
// We've parsed an unexpected Scale Value instead of an index
// register. Interpret it as an absolute.
int64_t ScaleVal;
if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
return ErrorOperand(Loc, "expected absolute expression");
if (ScaleVal != 1)
Warning(Loc, "scale factor without index register is ignored");
Scale = 1;
} else { // IndexReg Found.
IndexReg = cast<X86MCExpr>(E)->getRegNo();
if (BaseReg == X86::RIP)
return ErrorOperand(
Loc, "%rip as base register can not have an index register");
if (IndexReg == X86::RIP)
return ErrorOperand(Loc, "%rip is not allowed as an index register");
if (parseOptionalToken(AsmToken::Comma)) {
// Parse the scale amount:
// ::= ',' [scale-expression]
// A scale amount without an index is ignored.
if (getLexer().isNot(AsmToken::RParen)) {
int64_t ScaleVal;
if (Parser.parseTokenLoc(Loc) ||
Parser.parseAbsoluteExpression(ScaleVal))
return ErrorOperand(Loc, "expected scale expression");
Scale = (unsigned)ScaleVal;
// Validate the scale amount.
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
Scale != 1)
return ErrorOperand(Loc,
"scale factor in 16-bit address must be 1");
if (checkScale(Scale, ErrMsg))
return ErrorOperand(Loc, ErrMsg);
}
}
}
}
}
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
return nullptr;
// This is to support otherwise illegal operand (%dx) found in various
// unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
// be supported. Mark such DX variants separately fix only in special cases.
if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
ErrMsg))
return ErrorOperand(BaseLoc, ErrMsg);
if (SegReg || BaseReg || IndexReg)
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
IndexReg, Scale, StartLoc, EndLoc);
return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
}
// Parse either a standard primary expression or a register.
bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
// See if this is a register first.
if (getTok().is(AsmToken::Percent) ||
(isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
MatchRegisterName(Parser.getTok().getString()))) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned RegNo;
if (ParseRegister(RegNo, StartLoc, EndLoc))
return true;
Res = X86MCExpr::create(RegNo, Parser.getContext());
return false;
}
return Parser.parsePrimaryExpr(Res, EndLoc);
}
bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
InstInfo = &Info;
// Reset the forced VEX encoding.
ForcedVEXEncoding = VEXEncoding_Default;
// Parse pseudo prefixes.
while (1) {
if (Name == "{") {
if (getLexer().isNot(AsmToken::Identifier))
return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
std::string Prefix = Parser.getTok().getString().lower();
Parser.Lex(); // Eat identifier.
if (getLexer().isNot(AsmToken::RCurly))
return Error(Parser.getTok().getLoc(), "Expected '}'");
Parser.Lex(); // Eat curly.
if (Prefix == "vex2")
ForcedVEXEncoding = VEXEncoding_VEX2;
else if (Prefix == "vex3")
ForcedVEXEncoding = VEXEncoding_VEX3;
else if (Prefix == "evex")
ForcedVEXEncoding = VEXEncoding_EVEX;
else
return Error(NameLoc, "unknown prefix");
NameLoc = Parser.getTok().getLoc();
if (getLexer().is(AsmToken::LCurly)) {
Parser.Lex();
Name = "{";
} else {
if (getLexer().isNot(AsmToken::Identifier))
return Error(Parser.getTok().getLoc(), "Expected identifier");
// FIXME: The mnemonic won't match correctly if its not in lower case.
Name = Parser.getTok().getString();
Parser.Lex();
}
continue;
}
break;
}
StringRef PatchedName = Name;
// Hack to skip "short" following Jcc.
if (isParsingIntelSyntax() &&
(PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
PatchedName == "jcxz" || PatchedName == "jexcz" ||
(PatchedName.startswith("j") &&
ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
StringRef NextTok = Parser.getTok().getString();
if (NextTok == "short") {
SMLoc NameEndLoc =
NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
// Eat the short keyword.
Parser.Lex();
// MS and GAS ignore the short keyword; they both determine the jmp type
// based on the distance of the label. (NASM does emit different code with
// and without "short," though.)
InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
NextTok.size() + 1);
}
}
// FIXME: Hack to recognize setneb as setne.
if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
2012-06-28 06:34:28 +08:00
unsigned ComparisonPredicate = ~0U;
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
unsigned CC = StringSwitch<unsigned>(
PatchedName.slice(CCIdx, PatchedName.size() - 2))
.Case("eq", 0x00)
.Case("eq_oq", 0x00)
.Case("lt", 0x01)
.Case("lt_os", 0x01)
.Case("le", 0x02)
.Case("le_os", 0x02)
.Case("unord", 0x03)
.Case("unord_q", 0x03)
.Case("neq", 0x04)
.Case("neq_uq", 0x04)
.Case("nlt", 0x05)
.Case("nlt_us", 0x05)
.Case("nle", 0x06)
.Case("nle_us", 0x06)
.Case("ord", 0x07)
.Case("ord_q", 0x07)
/* AVX only from here */
.Case("eq_uq", 0x08)
.Case("nge", 0x09)
.Case("nge_us", 0x09)
.Case("ngt", 0x0A)
.Case("ngt_us", 0x0A)
.Case("false", 0x0B)
.Case("false_oq", 0x0B)
.Case("neq_oq", 0x0C)
.Case("ge", 0x0D)
.Case("ge_os", 0x0D)
.Case("gt", 0x0E)
.Case("gt_os", 0x0E)
.Case("true", 0x0F)
.Case("true_uq", 0x0F)
.Case("eq_os", 0x10)
.Case("lt_oq", 0x11)
.Case("le_oq", 0x12)
.Case("unord_s", 0x13)
.Case("neq_us", 0x14)
.Case("nlt_uq", 0x15)
.Case("nle_uq", 0x16)
.Case("ord_s", 0x17)
.Case("eq_us", 0x18)
.Case("nge_uq", 0x19)
.Case("ngt_uq", 0x1A)
.Case("false_os", 0x1B)
.Case("neq_os", 0x1C)
.Case("ge_oq", 0x1D)
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
if (CC != ~0U && (IsVCMP || CC < 8)) {
if (PatchedName.endswith("ss"))
PatchedName = IsVCMP ? "vcmpss" : "cmpss";
else if (PatchedName.endswith("sd"))
PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
else if (PatchedName.endswith("ps"))
PatchedName = IsVCMP ? "vcmpps" : "cmpps";
else if (PatchedName.endswith("pd"))
PatchedName = IsVCMP ? "vcmppd" : "cmppd";
else
llvm_unreachable("Unexpected suffix!");
ComparisonPredicate = CC;
}
}
// FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
if (PatchedName.startswith("vpcmp") &&
(PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
unsigned CC = StringSwitch<unsigned>(
PatchedName.slice(5, PatchedName.size() - SuffixSize))
.Case("eq", 0x0) // Only allowed on unsigned. Checked below.
.Case("lt", 0x1)
.Case("le", 0x2)
//.Case("false", 0x3) // Not a documented alias.
.Case("neq", 0x4)
.Case("nlt", 0x5)
.Case("nle", 0x6)
//.Case("true", 0x7) // Not a documented alias.
.Default(~0U);
if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
switch (PatchedName.back()) {
default: llvm_unreachable("Unexpected character!");
case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
}
// Set up the immediate to push into the operands later.
ComparisonPredicate = CC;
}
}
// FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
if (PatchedName.startswith("vpcom") &&
[X86] Remove the _alt forms of XOP VPCOM instructions. Use a combination of custom printing and custom parsing to achieve the same result and more Previously we had a regular form of the instruction used when the immediate was 0-7. And _alt form that allowed the full 8 bit immediate. Codegen would always use the 0-7 form since the immediate was always checked to be in range. Assembly parsing would use the 0-7 form when a mnemonic like vpcomtrueb was used. If the immediate was specified directly the _alt form was used. The disassembler would prefer to use the 0-7 form instruction when the immediate was in range and the _alt form otherwise. This way disassembly would print the most readable form when possible. The assembly parsing for things like vpcomtrueb relied on splitting the mnemonic into 3 pieces. A "vpcom" prefix, an immediate representing the "true", and a suffix of "b". The tablegenerated printing code would similarly print a "vpcom" prefix, decode the immediate into a string, and then print "b". The _alt form on the other hand parsed and printed like any other instruction with no specialness. With this patch we drop to one form and solve the disassembly printing issue by doing custom printing when the immediate is 0-7. The parsing code has been tweaked to turn "vpcomtrueb" into "vpcomb" and then the immediate for the "true" is inserted either before or after the other operands depending on at&t or intel syntax. I'd rather not do the custom printing, but I tried using an InstAlias for each possible mnemonic for all 8 immediates for all 16 combinations of element size, signedness, and memory/register. The code emitted into printAliasInstr ended up checking the number of operands, the register class of each operand, and the immediate for all 256 aliases. This was repeated for both the at&t and intel printer. Despite a lot of common checks between all of the aliases, when compiled with clang at least this commonality was not well optimized. Nor do all the checks seem necessary. Since I want to do a similar thing for vcmpps/pd/ss/sd which have 32 immediate values and 3 encoding flavors, 3 register sizes, etc. This didn't seem to scale well for clang binary size. So custom printing seemed a better trade off. I also considered just using the InstAlias for the matching and not the printing. But that seemed like it would add a lot of extra rows to the matcher table. Especially given that the 32 immediates for vpcmpps have 46 strings associated with them. Differential Revision: https://reviews.llvm.org/D59398 llvm-svn: 356343
2019-03-18 05:21:37 +08:00
(PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
unsigned CC = StringSwitch<unsigned>(
PatchedName.slice(5, PatchedName.size() - SuffixSize))
.Case("lt", 0x0)
.Case("le", 0x1)
.Case("gt", 0x2)
.Case("ge", 0x3)
.Case("eq", 0x4)
.Case("neq", 0x5)
.Case("false", 0x6)
.Case("true", 0x7)
.Default(~0U);
[X86] Remove the _alt forms of XOP VPCOM instructions. Use a combination of custom printing and custom parsing to achieve the same result and more Previously we had a regular form of the instruction used when the immediate was 0-7. And _alt form that allowed the full 8 bit immediate. Codegen would always use the 0-7 form since the immediate was always checked to be in range. Assembly parsing would use the 0-7 form when a mnemonic like vpcomtrueb was used. If the immediate was specified directly the _alt form was used. The disassembler would prefer to use the 0-7 form instruction when the immediate was in range and the _alt form otherwise. This way disassembly would print the most readable form when possible. The assembly parsing for things like vpcomtrueb relied on splitting the mnemonic into 3 pieces. A "vpcom" prefix, an immediate representing the "true", and a suffix of "b". The tablegenerated printing code would similarly print a "vpcom" prefix, decode the immediate into a string, and then print "b". The _alt form on the other hand parsed and printed like any other instruction with no specialness. With this patch we drop to one form and solve the disassembly printing issue by doing custom printing when the immediate is 0-7. The parsing code has been tweaked to turn "vpcomtrueb" into "vpcomb" and then the immediate for the "true" is inserted either before or after the other operands depending on at&t or intel syntax. I'd rather not do the custom printing, but I tried using an InstAlias for each possible mnemonic for all 8 immediates for all 16 combinations of element size, signedness, and memory/register. The code emitted into printAliasInstr ended up checking the number of operands, the register class of each operand, and the immediate for all 256 aliases. This was repeated for both the at&t and intel printer. Despite a lot of common checks between all of the aliases, when compiled with clang at least this commonality was not well optimized. Nor do all the checks seem necessary. Since I want to do a similar thing for vcmpps/pd/ss/sd which have 32 immediate values and 3 encoding flavors, 3 register sizes, etc. This didn't seem to scale well for clang binary size. So custom printing seemed a better trade off. I also considered just using the InstAlias for the matching and not the printing. But that seemed like it would add a lot of extra rows to the matcher table. Especially given that the 32 immediates for vpcmpps have 46 strings associated with them. Differential Revision: https://reviews.llvm.org/D59398 llvm-svn: 356343
2019-03-18 05:21:37 +08:00
if (CC != ~0U) {
switch (PatchedName.back()) {
default: llvm_unreachable("Unexpected character!");
case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
}
// Set up the immediate to push into the operands later.
ComparisonPredicate = CC;
}
}
// Determine whether this is an instruction prefix.
// FIXME:
// Enhance prefixes integrity robustness. for example, following forms
// are currently tolerated:
// repz repnz <insn> ; GAS errors for the use of two similar prefixes
// lock addq %rax, %rbx ; Destination operand must be of memory type
// xacquire <insn> ; xacquire must be accompanied by 'lock'
bool isPrefix = StringSwitch<bool>(Name)
.Cases("rex64", "data32", "data16", true)
.Cases("xacquire", "xrelease", true)
.Cases("acquire", "release", isParsingIntelSyntax())
.Default(false);
auto isLockRepeatNtPrefix = [](StringRef N) {
return StringSwitch<bool>(N)
.Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
.Default(false);
};
2010-10-09 19:00:50 +08:00
bool CurlyAsEndOfStatement = false;
unsigned Flags = X86::IP_NO_PREFIX;
while (isLockRepeatNtPrefix(Name.lower())) {
unsigned Prefix =
StringSwitch<unsigned>(Name)
.Cases("lock", "lock", X86::IP_HAS_LOCK)
.Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
.Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
.Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
.Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
Flags |= Prefix;
if (getLexer().is(AsmToken::EndOfStatement)) {
// We don't have real instr with the given prefix
// let's use the prefix as the instr.
// TODO: there could be several prefixes one after another
Flags = X86::IP_NO_PREFIX;
break;
}
// FIXME: The mnemonic won't match correctly if its not in lower case.
Name = Parser.getTok().getString();
Parser.Lex(); // eat the prefix
// Hack: we could have something like "rep # some comment" or
// "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
while (Name.startswith(";") || Name.startswith("\n") ||
Name.startswith("#") || Name.startswith("\t") ||
Name.startswith("/")) {
// FIXME: The mnemonic won't match correctly if its not in lower case.
Name = Parser.getTok().getString();
Parser.Lex(); // go to next prefix or instr
}
}
if (Flags)
PatchedName = Name;
// Hacks to handle 'data16' and 'data32'
if (PatchedName == "data16" && is16BitMode()) {
return Error(NameLoc, "redundant data16 prefix");
}
if (PatchedName == "data32") {
if (is32BitMode())
return Error(NameLoc, "redundant data32 prefix");
if (is64BitMode())
return Error(NameLoc, "'data32' is not supported in 64-bit mode");
// Hack to 'data16' for the table lookup.
PatchedName = "data16";
}
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
[X86] Remove the _alt forms of XOP VPCOM instructions. Use a combination of custom printing and custom parsing to achieve the same result and more Previously we had a regular form of the instruction used when the immediate was 0-7. And _alt form that allowed the full 8 bit immediate. Codegen would always use the 0-7 form since the immediate was always checked to be in range. Assembly parsing would use the 0-7 form when a mnemonic like vpcomtrueb was used. If the immediate was specified directly the _alt form was used. The disassembler would prefer to use the 0-7 form instruction when the immediate was in range and the _alt form otherwise. This way disassembly would print the most readable form when possible. The assembly parsing for things like vpcomtrueb relied on splitting the mnemonic into 3 pieces. A "vpcom" prefix, an immediate representing the "true", and a suffix of "b". The tablegenerated printing code would similarly print a "vpcom" prefix, decode the immediate into a string, and then print "b". The _alt form on the other hand parsed and printed like any other instruction with no specialness. With this patch we drop to one form and solve the disassembly printing issue by doing custom printing when the immediate is 0-7. The parsing code has been tweaked to turn "vpcomtrueb" into "vpcomb" and then the immediate for the "true" is inserted either before or after the other operands depending on at&t or intel syntax. I'd rather not do the custom printing, but I tried using an InstAlias for each possible mnemonic for all 8 immediates for all 16 combinations of element size, signedness, and memory/register. The code emitted into printAliasInstr ended up checking the number of operands, the register class of each operand, and the immediate for all 256 aliases. This was repeated for both the at&t and intel printer. Despite a lot of common checks between all of the aliases, when compiled with clang at least this commonality was not well optimized. Nor do all the checks seem necessary. Since I want to do a similar thing for vcmpps/pd/ss/sd which have 32 immediate values and 3 encoding flavors, 3 register sizes, etc. This didn't seem to scale well for clang binary size. So custom printing seemed a better trade off. I also considered just using the InstAlias for the matching and not the printing. But that seemed like it would add a lot of extra rows to the matcher table. Especially given that the 32 immediates for vpcmpps have 46 strings associated with them. Differential Revision: https://reviews.llvm.org/D59398 llvm-svn: 356343
2019-03-18 05:21:37 +08:00
// Push the immediate if we extracted one from the mnemonic.
if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
[X86] Remove the _alt forms of XOP VPCOM instructions. Use a combination of custom printing and custom parsing to achieve the same result and more Previously we had a regular form of the instruction used when the immediate was 0-7. And _alt form that allowed the full 8 bit immediate. Codegen would always use the 0-7 form since the immediate was always checked to be in range. Assembly parsing would use the 0-7 form when a mnemonic like vpcomtrueb was used. If the immediate was specified directly the _alt form was used. The disassembler would prefer to use the 0-7 form instruction when the immediate was in range and the _alt form otherwise. This way disassembly would print the most readable form when possible. The assembly parsing for things like vpcomtrueb relied on splitting the mnemonic into 3 pieces. A "vpcom" prefix, an immediate representing the "true", and a suffix of "b". The tablegenerated printing code would similarly print a "vpcom" prefix, decode the immediate into a string, and then print "b". The _alt form on the other hand parsed and printed like any other instruction with no specialness. With this patch we drop to one form and solve the disassembly printing issue by doing custom printing when the immediate is 0-7. The parsing code has been tweaked to turn "vpcomtrueb" into "vpcomb" and then the immediate for the "true" is inserted either before or after the other operands depending on at&t or intel syntax. I'd rather not do the custom printing, but I tried using an InstAlias for each possible mnemonic for all 8 immediates for all 16 combinations of element size, signedness, and memory/register. The code emitted into printAliasInstr ended up checking the number of operands, the register class of each operand, and the immediate for all 256 aliases. This was repeated for both the at&t and intel printer. Despite a lot of common checks between all of the aliases, when compiled with clang at least this commonality was not well optimized. Nor do all the checks seem necessary. Since I want to do a similar thing for vcmpps/pd/ss/sd which have 32 immediate values and 3 encoding flavors, 3 register sizes, etc. This didn't seem to scale well for clang binary size. So custom printing seemed a better trade off. I also considered just using the InstAlias for the matching and not the printing. But that seemed like it would add a lot of extra rows to the matcher table. Especially given that the 32 immediates for vpcmpps have 46 strings associated with them. Differential Revision: https://reviews.llvm.org/D59398 llvm-svn: 356343
2019-03-18 05:21:37 +08:00
getParser().getContext());
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
}
// This does the actual operand parsing. Don't parse any more if we have a
// prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
// just want to parse the "lock" as the first instruction and the "incl" as
// the next one.
if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star))
Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
// Read the operands.
while(1) {
if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
Operands.push_back(std::move(Op));
if (HandleAVX512Operand(Operands, *Operands.back()))
return true;
} else {
return true;
}
// check for comma and eat it
if (getLexer().is(AsmToken::Comma))
Parser.Lex();
else
break;
}
// In MS inline asm curly braces mark the beginning/end of a block,
// therefore they should be interepreted as end of statement
CurlyAsEndOfStatement =
isParsingIntelSyntax() && isParsingInlineAsm() &&
(getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
return TokError("unexpected token in argument list");
}
2010-10-09 19:00:50 +08:00
[X86] Remove the _alt forms of XOP VPCOM instructions. Use a combination of custom printing and custom parsing to achieve the same result and more Previously we had a regular form of the instruction used when the immediate was 0-7. And _alt form that allowed the full 8 bit immediate. Codegen would always use the 0-7 form since the immediate was always checked to be in range. Assembly parsing would use the 0-7 form when a mnemonic like vpcomtrueb was used. If the immediate was specified directly the _alt form was used. The disassembler would prefer to use the 0-7 form instruction when the immediate was in range and the _alt form otherwise. This way disassembly would print the most readable form when possible. The assembly parsing for things like vpcomtrueb relied on splitting the mnemonic into 3 pieces. A "vpcom" prefix, an immediate representing the "true", and a suffix of "b". The tablegenerated printing code would similarly print a "vpcom" prefix, decode the immediate into a string, and then print "b". The _alt form on the other hand parsed and printed like any other instruction with no specialness. With this patch we drop to one form and solve the disassembly printing issue by doing custom printing when the immediate is 0-7. The parsing code has been tweaked to turn "vpcomtrueb" into "vpcomb" and then the immediate for the "true" is inserted either before or after the other operands depending on at&t or intel syntax. I'd rather not do the custom printing, but I tried using an InstAlias for each possible mnemonic for all 8 immediates for all 16 combinations of element size, signedness, and memory/register. The code emitted into printAliasInstr ended up checking the number of operands, the register class of each operand, and the immediate for all 256 aliases. This was repeated for both the at&t and intel printer. Despite a lot of common checks between all of the aliases, when compiled with clang at least this commonality was not well optimized. Nor do all the checks seem necessary. Since I want to do a similar thing for vcmpps/pd/ss/sd which have 32 immediate values and 3 encoding flavors, 3 register sizes, etc. This didn't seem to scale well for clang binary size. So custom printing seemed a better trade off. I also considered just using the InstAlias for the matching and not the printing. But that seemed like it would add a lot of extra rows to the matcher table. Especially given that the 32 immediates for vpcmpps have 46 strings associated with them. Differential Revision: https://reviews.llvm.org/D59398 llvm-svn: 356343
2019-03-18 05:21:37 +08:00
// Push the immediate if we extracted one from the mnemonic.
if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
[X86] Remove the _alt forms of XOP VPCOM instructions. Use a combination of custom printing and custom parsing to achieve the same result and more Previously we had a regular form of the instruction used when the immediate was 0-7. And _alt form that allowed the full 8 bit immediate. Codegen would always use the 0-7 form since the immediate was always checked to be in range. Assembly parsing would use the 0-7 form when a mnemonic like vpcomtrueb was used. If the immediate was specified directly the _alt form was used. The disassembler would prefer to use the 0-7 form instruction when the immediate was in range and the _alt form otherwise. This way disassembly would print the most readable form when possible. The assembly parsing for things like vpcomtrueb relied on splitting the mnemonic into 3 pieces. A "vpcom" prefix, an immediate representing the "true", and a suffix of "b". The tablegenerated printing code would similarly print a "vpcom" prefix, decode the immediate into a string, and then print "b". The _alt form on the other hand parsed and printed like any other instruction with no specialness. With this patch we drop to one form and solve the disassembly printing issue by doing custom printing when the immediate is 0-7. The parsing code has been tweaked to turn "vpcomtrueb" into "vpcomb" and then the immediate for the "true" is inserted either before or after the other operands depending on at&t or intel syntax. I'd rather not do the custom printing, but I tried using an InstAlias for each possible mnemonic for all 8 immediates for all 16 combinations of element size, signedness, and memory/register. The code emitted into printAliasInstr ended up checking the number of operands, the register class of each operand, and the immediate for all 256 aliases. This was repeated for both the at&t and intel printer. Despite a lot of common checks between all of the aliases, when compiled with clang at least this commonality was not well optimized. Nor do all the checks seem necessary. Since I want to do a similar thing for vcmpps/pd/ss/sd which have 32 immediate values and 3 encoding flavors, 3 register sizes, etc. This didn't seem to scale well for clang binary size. So custom printing seemed a better trade off. I also considered just using the InstAlias for the matching and not the printing. But that seemed like it would add a lot of extra rows to the matcher table. Especially given that the 32 immediates for vpcmpps have 46 strings associated with them. Differential Revision: https://reviews.llvm.org/D59398 llvm-svn: 356343
2019-03-18 05:21:37 +08:00
getParser().getContext());
Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
}
// Consume the EndOfStatement or the prefix separator Slash
if (getLexer().is(AsmToken::EndOfStatement) ||
(isPrefix && getLexer().is(AsmToken::Slash)))
Parser.Lex();
else if (CurlyAsEndOfStatement)
// Add an actual EndOfStatement before the curly brace
Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
getLexer().getTok().getLoc(), 0);
// This is for gas compatibility and cannot be done in td.
// Adding "p" for some floating point with no argument.
// For example: fsub --> fsubp
bool IsFp =
Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
if (IsFp && Operands.size() == 1) {
const char *Repl = StringSwitch<const char *>(Name)
.Case("fsub", "fsubp")
.Case("fdiv", "fdivp")
.Case("fsubr", "fsubrp")
.Case("fdivr", "fdivrp");
static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
}
if ((Name == "mov" || Name == "movw" || Name == "movl") &&
(Operands.size() == 3)) {
X86Operand &Op1 = (X86Operand &)*Operands[1];
X86Operand &Op2 = (X86Operand &)*Operands[2];
SMLoc Loc = Op1.getEndLoc();
// Moving a 32 or 16 bit value into a segment register has the same
// behavior. Modify such instructions to always take shorter form.
if (Op1.isReg() && Op2.isReg() &&
X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
Op2.getReg()) &&
(X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
// Change instruction name to match new instruction.
if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
Name = is16BitMode() ? "movw" : "movl";
Operands[0] = X86Operand::CreateToken(Name, NameLoc);
}
// Select the correct equivalent 16-/32-bit source register.
unsigned Reg =
getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
}
}
// This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
// "outb %al, %dx". Out doesn't take a memory form, but this is a widely
// documented form in various unofficial manuals, so a lot of code uses it.
if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
Operands.size() == 3) {
X86Operand &Op = (X86Operand &)*Operands.back();
if (Op.isDXReg())
Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
Op.getEndLoc());
}
// Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
Operands.size() == 3) {
X86Operand &Op = (X86Operand &)*Operands[1];
if (Op.isDXReg())
Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
Op.getEndLoc());
}
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
bool HadVerifyError = false;
// Append default arguments to "ins[bwld]"
if (Name.startswith("ins") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
Name == "ins")) {
AddDefaultSrcDestOperands(TmpOperands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Append default arguments to "outs[bwld]"
if (Name.startswith("outs") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" || Name == "outs")) {
AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
// values of $SIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
if (Name.startswith("lods") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
// values of $DIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
if (Name.startswith("stos") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "stos" || Name == "stosb" || Name == "stosw" ||
Name == "stosl" || Name == "stosd" || Name == "stosq")) {
TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
// values of $DIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
if (Name.startswith("scas") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "scas" || Name == "scasb" || Name == "scasw" ||
Name == "scasl" || Name == "scasd" || Name == "scasq")) {
TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Add default SI and DI operands to "cmps[bwlq]".
if (Name.startswith("cmps") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
DefaultMemSIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Add default SI and DI operands to "movs[bwlq]".
if (((Name.startswith("movs") &&
(Name == "movs" || Name == "movsb" || Name == "movsw" ||
Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
(Name.startswith("smov") &&
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
(Operands.size() == 1 || Operands.size() == 3)) {
if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Check if we encountered an error for one the string insturctions
if (HadVerifyError) {
return HadVerifyError;
}
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
// "shift <op>".
if ((Name.startswith("shr") || Name.startswith("sar") ||
Name.startswith("shl") || Name.startswith("sal") ||
Name.startswith("rcl") || Name.startswith("rcr") ||
Name.startswith("rol") || Name.startswith("ror")) &&
2010-09-07 02:32:06 +08:00
Operands.size() == 3) {
if (isParsingIntelSyntax()) {
// Intel syntax
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
Operands.pop_back();
} else {
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
Operands.erase(Operands.begin() + 1);
2010-09-07 02:32:06 +08:00
}
}
2012-06-28 06:34:28 +08:00
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
if (Op1.isImm())
if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
if (CE->getValue() == 3) {
Operands.erase(Operands.begin() + 1);
static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
}
}
2010-10-09 19:00:50 +08:00
// Transforms "xlat mem8" into "xlatb"
if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
if (Op1.isMem8()) {
Warning(Op1.getStartLoc(), "memory operand is only for determining the "
"size, (R|E)BX will be used for the location");
Operands.pop_back();
static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
}
}
if (Flags)
Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
return false;
}
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
const MCRegisterInfo *MRI = getContext().getRegisterInfo();
switch (Inst.getOpcode()) {
default: return false;
case X86::VMOVZPQILo2PQIrr:
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
case X86::VMOVAPSrr:
case X86::VMOVAPSYrr:
case X86::VMOVDQArr:
case X86::VMOVDQAYrr:
case X86::VMOVDQUrr:
case X86::VMOVDQUYrr:
case X86::VMOVUPDrr:
case X86::VMOVUPDYrr:
case X86::VMOVUPSrr:
case X86::VMOVUPSYrr: {
// We can get a smaller encoding by using VEX.R instead of VEX.B if one of
// the registers is extended, but other isn't.
if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
return false;
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
}
Inst.setOpcode(NewOpc);
return true;
}
case X86::VMOVSDrr:
case X86::VMOVSSrr: {
// We can get a smaller encoding by using VEX.R instead of VEX.B if one of
// the registers is extended, but other isn't.
if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
return false;
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
}
Inst.setOpcode(NewOpc);
return true;
}
}
}
bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
const MCRegisterInfo *MRI = getContext().getRegisterInfo();
switch (Inst.getOpcode()) {
case X86::VGATHERDPDYrm:
case X86::VGATHERDPDrm:
case X86::VGATHERDPSYrm:
case X86::VGATHERDPSrm:
case X86::VGATHERQPDYrm:
case X86::VGATHERQPDrm:
case X86::VGATHERQPSYrm:
case X86::VGATHERQPSrm:
case X86::VPGATHERDDYrm:
case X86::VPGATHERDDrm:
case X86::VPGATHERDQYrm:
case X86::VPGATHERDQrm:
case X86::VPGATHERQDYrm:
case X86::VPGATHERQDrm:
case X86::VPGATHERQQYrm:
case X86::VPGATHERQQrm: {
unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
unsigned Index =
MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
if (Dest == Mask || Dest == Index || Mask == Index)
return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
"registers should be distinct");
break;
}
case X86::VGATHERDPDZ128rm:
case X86::VGATHERDPDZ256rm:
case X86::VGATHERDPDZrm:
case X86::VGATHERDPSZ128rm:
case X86::VGATHERDPSZ256rm:
case X86::VGATHERDPSZrm:
case X86::VGATHERQPDZ128rm:
case X86::VGATHERQPDZ256rm:
case X86::VGATHERQPDZrm:
case X86::VGATHERQPSZ128rm:
case X86::VGATHERQPSZ256rm:
case X86::VGATHERQPSZrm:
case X86::VPGATHERDDZ128rm:
case X86::VPGATHERDDZ256rm:
case X86::VPGATHERDDZrm:
case X86::VPGATHERDQZ128rm:
case X86::VPGATHERDQZ256rm:
case X86::VPGATHERDQZrm:
case X86::VPGATHERQDZ128rm:
case X86::VPGATHERQDZ256rm:
case X86::VPGATHERQDZrm:
case X86::VPGATHERQQZ128rm:
case X86::VPGATHERQQZ256rm:
case X86::VPGATHERQQZrm: {
unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
unsigned Index =
MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
if (Dest == Index)
return Warning(Ops[0]->getStartLoc(), "index and destination registers "
"should be distinct");
break;
}
case X86::V4FMADDPSrm:
case X86::V4FMADDPSrmk:
case X86::V4FMADDPSrmkz:
case X86::V4FMADDSSrm:
case X86::V4FMADDSSrmk:
case X86::V4FMADDSSrmkz:
case X86::V4FNMADDPSrm:
case X86::V4FNMADDPSrmk:
case X86::V4FNMADDPSrmkz:
case X86::V4FNMADDSSrm:
case X86::V4FNMADDSSrmk:
case X86::V4FNMADDSSrmkz:
case X86::VP4DPWSSDSrm:
case X86::VP4DPWSSDSrmk:
case X86::VP4DPWSSDSrmkz:
case X86::VP4DPWSSDrm:
case X86::VP4DPWSSDrmk:
case X86::VP4DPWSSDrmkz: {
unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
X86::AddrNumOperands - 1).getReg();
unsigned Src2Enc = MRI->getEncodingValue(Src2);
if (Src2Enc % 4 != 0) {
StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
unsigned GroupStart = (Src2Enc / 4) * 4;
unsigned GroupEnd = GroupStart + 3;
return Warning(Ops[0]->getStartLoc(),
"source register '" + RegName + "' implicitly denotes '" +
RegName.take_front(3) + Twine(GroupStart) + "' to '" +
RegName.take_front(3) + Twine(GroupEnd) +
"' source group");
}
break;
}
}
return false;
}
static const char *getSubtargetFeatureName(uint64_t Val);
void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
Out.EmitInstruction(Inst, getSTI());
}
bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
if (isParsingIntelSyntax())
return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
MatchingInlineAsm);
return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
MatchingInlineAsm);
}
void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
OperandVector &Operands, MCStreamer &Out,
bool MatchingInlineAsm) {
// FIXME: This should be replaced with a real .td file alias mechanism.
2012-08-29 07:57:47 +08:00
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
const char *Repl = StringSwitch<const char *>(Op.getToken())
.Case("finit", "fninit")
.Case("fsave", "fnsave")
.Case("fstcw", "fnstcw")
.Case("fstcww", "fnstcw")
.Case("fstenv", "fnstenv")
.Case("fstsw", "fnstsw")
.Case("fstsww", "fnstsw")
.Case("fclex", "fnclex")
.Default(nullptr);
if (Repl) {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
}
bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
const FeatureBitset &MissingFeatures,
bool MatchingInlineAsm) {
assert(MissingFeatures.any() && "Unknown missing feature!");
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
OS << "instruction requires:";
for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
if (MissingFeatures[i])
OS << ' ' << getSubtargetFeatureName(i);
}
return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
}
static unsigned getPrefixes(OperandVector &Operands) {
unsigned Result = 0;
X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
if (Prefix.isPrefix()) {
Result = Prefix.getPrefix();
Operands.pop_back();
}
return Result;
}
unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
unsigned Opc = Inst.getOpcode();
const MCInstrDesc &MCID = MII.get(Opc);
if (ForcedVEXEncoding == VEXEncoding_EVEX &&
(MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
return Match_Unsupported;
if ((ForcedVEXEncoding == VEXEncoding_VEX2 ||
ForcedVEXEncoding == VEXEncoding_VEX3) &&
(MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
return Match_Unsupported;
// These instructions match ambiguously with their VEX encoded counterparts
// and appear first in the matching table. Reject them unless we're forcing
// EVEX encoding.
// FIXME: We really need a way to break the ambiguity.
switch (Opc) {
case X86::VCVTSD2SIZrm_Int:
case X86::VCVTSD2SI64Zrm_Int:
case X86::VCVTSS2SIZrm_Int:
case X86::VCVTSS2SI64Zrm_Int:
case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int:
case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int:
case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
if (ForcedVEXEncoding != VEXEncoding_EVEX)
return Match_Unsupported;
}
return Match_Success;
}
bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
SMRange EmptyRange = None;
// First, handle aliases that expand to multiple instructions.
MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
Out, MatchingInlineAsm);
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
unsigned Prefixes = getPrefixes(Operands);
MCInst Inst;
2010-10-09 19:00:50 +08:00
// If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
// encoder.
if (ForcedVEXEncoding == VEXEncoding_VEX3)
Prefixes |= X86::IP_USE_VEX3;
if (Prefixes)
Inst.setFlags(Prefixes);
// First, try a direct match.
FeatureBitset MissingFeatures;
unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax());
switch (OriginalError) {
default: llvm_unreachable("Unexpected match result!");
case Match_Success:
if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
return true;
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
2012-06-28 06:34:28 +08:00
// individual transformations can chain off each other.
if (!MatchingInlineAsm)
while (processInstruction(Inst, Operands))
;
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
case Match_InvalidImmUnsignedi4: {
SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
EmptyRange, MatchingInlineAsm);
}
case Match_MissingFeature:
return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
case Match_InvalidOperand:
case Match_MnemonicFail:
case Match_Unsupported:
break;
}
if (Op.getToken().empty()) {
Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
MatchingInlineAsm);
return true;
}
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
// type. However, that requires substantially more matcher support than the
// following hack.
2010-10-09 19:00:50 +08:00
// Change the operand to point to a temporary token.
StringRef Base = Op.getToken();
SmallString<16> Tmp;
Tmp += Base;
Tmp += ' ';
Op.setTokenValue(Tmp);
// If this instruction starts with an 'f', then it is a floating point stack
// instruction. These come in up to three forms for 32-bit, 64-bit, and
// 80-bit floating point, which use the suffixes s,l,t respectively.
//
// Otherwise, we assume that this may be an integer instruction, which comes
// in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2012-06-28 06:34:28 +08:00
// Check for the various suffix matches.
uint64_t ErrorInfoIgnore;
FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
unsigned Match[4];
for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
Tmp.back() = Suffixes[I];
Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax());
// If this returned as a missing feature failure, remember that.
if (Match[I] == Match_MissingFeature)
ErrorInfoMissingFeatures = MissingFeatures;
}
// Restore the old token.
Op.setTokenValue(Base);
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
unsigned NumSuccessfulMatches =
std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
}
// Otherwise, the match failed, try to produce a decent error message.
// If we had multiple suffix matches, then identify this as an ambiguous
// match.
if (NumSuccessfulMatches > 1) {
char MatchChars[4];
unsigned NumMatches = 0;
for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
if (Match[I] == Match_Success)
MatchChars[NumMatches++] = Suffixes[I];
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
OS << "ambiguous instructions require an explicit suffix (could be ";
for (unsigned i = 0; i != NumMatches; ++i) {
if (i != 0)
OS << ", ";
if (i + 1 == NumMatches)
OS << "or ";
OS << "'" << Base << MatchChars[i] << "'";
}
OS << ")";
Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
return true;
}
2010-10-09 19:00:50 +08:00
// Okay, we know that none of the variants matched successfully.
2010-10-09 19:00:50 +08:00
// If all of the instructions reported an invalid mnemonic, then the original
// mnemonic was invalid.
if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
if (OriginalError == Match_MnemonicFail)
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
Op.getLocRange(), MatchingInlineAsm);
if (OriginalError == Match_Unsupported)
return Error(IDLoc, "unsupported instruction", EmptyRange,
MatchingInlineAsm);
assert(OriginalError == Match_InvalidOperand && "Unexpected error");
// Recover location info for the operand if we know which was the problem.
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction", EmptyRange,
MatchingInlineAsm);
2010-10-09 19:00:50 +08:00
X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
if (Operand.getStartLoc().isValid()) {
SMRange OperandRange = Operand.getLocRange();
return Error(Operand.getStartLoc(), "invalid operand for instruction",
OperandRange, MatchingInlineAsm);
}
}
return Error(IDLoc, "invalid operand for instruction", EmptyRange,
MatchingInlineAsm);
}
2010-10-09 19:00:50 +08:00
// If one instruction matched as unsupported, report this as unsupported.
if (std::count(std::begin(Match), std::end(Match),
Match_Unsupported) == 1) {
return Error(IDLoc, "unsupported instruction", EmptyRange,
MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if (std::count(std::begin(Match), std::end(Match),
Match_MissingFeature) == 1) {
ErrorInfo = Match_MissingFeature;
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
MatchingInlineAsm);
}
2010-10-09 19:00:50 +08:00
// If one instruction matched with an invalid operand, report this as an
// operand failure.
if (std::count(std::begin(Match), std::end(Match),
Match_InvalidOperand) == 1) {
return Error(IDLoc, "invalid operand for instruction", EmptyRange,
MatchingInlineAsm);
}
2010-10-09 19:00:50 +08:00
// If all of these were an outright failure, report it in a useless way.
Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
EmptyRange, MatchingInlineAsm);
return true;
}
bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
SMRange EmptyRange = None;
StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
unsigned Prefixes = getPrefixes(Operands);
// First, handle aliases that expand to multiple instructions.
MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
MCInst Inst;
// If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
// encoder.
if (ForcedVEXEncoding == VEXEncoding_VEX3)
Prefixes |= X86::IP_USE_VEX3;
if (Prefixes)
Inst.setFlags(Prefixes);
// Find one unsized memory operand, if present.
X86Operand *UnsizedMemOp = nullptr;
for (const auto &Op : Operands) {
X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
if (X86Op->isMemUnsized()) {
UnsizedMemOp = X86Op;
// Have we found an unqualified memory operand,
// break. IA allows only one memory operand.
break;
}
}
// Allow some instructions to have implicitly pointer-sized operands. This is
// compatible with gas.
if (UnsizedMemOp) {
static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
for (const char *Instr : PtrSizedInstrs) {
if (Mnemonic == Instr) {
UnsizedMemOp->Mem.Size = getPointerWidth();
break;
}
}
}
SmallVector<unsigned, 8> Match;
FeatureBitset ErrorInfoMissingFeatures;
FeatureBitset MissingFeatures;
// If unsized push has immediate operand we should default the default pointer
// size for the size.
if (Mnemonic == "push" && Operands.size() == 2) {
auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
if (X86Op->isImm()) {
// If it's not a constant fall through and let remainder take care of it.
const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
unsigned Size = getPointerWidth();
if (CE &&
(isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
SmallString<16> Tmp;
Tmp += Base;
Tmp += (is64BitMode())
? "q"
: (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
Op.setTokenValue(Tmp);
// Do match in ATT mode to allow explicit suffix usage.
Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
MissingFeatures, MatchingInlineAsm,
false /*isParsingIntelSyntax()*/));
Op.setTokenValue(Base);
}
}
}
// If an unsized memory operand is present, try to match with each memory
// operand size. In Intel assembly, the size is not part of the instruction
// mnemonic.
if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
for (unsigned Size : MopSizes) {
UnsizedMemOp->Mem.Size = Size;
uint64_t ErrorInfoIgnore;
unsigned LastOpcode = Inst.getOpcode();
unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax());
if (Match.empty() || LastOpcode != Inst.getOpcode())
Match.push_back(M);
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
ErrorInfoMissingFeatures = MissingFeatures;
}
// Restore the size of the unsized memory operand if we modified it.
UnsizedMemOp->Mem.Size = 0;
}
// If we haven't matched anything yet, this is not a basic integer or FPU
// operation. There shouldn't be any ambiguity in our mnemonic table, so try
// matching with the unsized operand.
if (Match.empty()) {
Match.push_back(MatchInstruction(
Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax()));
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
ErrorInfoMissingFeatures = MissingFeatures;
}
// Restore the size of the unsized memory operand if we modified it.
if (UnsizedMemOp)
UnsizedMemOp->Mem.Size = 0;
// If it's a bad mnemonic, all results will be the same.
if (Match.back() == Match_MnemonicFail) {
return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
Op.getLocRange(), MatchingInlineAsm);
}
unsigned NumSuccessfulMatches =
std::count(std::begin(Match), std::end(Match), Match_Success);
// If matching was ambiguous and we had size information from the frontend,
// try again with that. This handles cases like "movxz eax, m8/m16".
if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
UnsizedMemOp->getMemFrontendSize()) {
UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
unsigned M = MatchInstruction(
Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax());
if (M == Match_Success)
NumSuccessfulMatches = 1;
// Add a rewrite that encodes the size information we used from the
// frontend.
InstInfo->AsmRewrites->emplace_back(
AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
/*Len=*/0, UnsizedMemOp->getMemFrontendSize());
}
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
if (NumSuccessfulMatches == 1) {
if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
return true;
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the individual
// transformations can chain off each other.
if (!MatchingInlineAsm)
while (processInstruction(Inst, Operands))
;
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
} else if (NumSuccessfulMatches > 1) {
assert(UnsizedMemOp &&
"multiple matches only possible with unsized memory operands");
return Error(UnsizedMemOp->getStartLoc(),
"ambiguous operand size for instruction '" + Mnemonic + "\'",
UnsizedMemOp->getLocRange());
}
// If one instruction matched as unsupported, report this as unsupported.
if (std::count(std::begin(Match), std::end(Match),
Match_Unsupported) == 1) {
return Error(IDLoc, "unsupported instruction", EmptyRange,
MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if (std::count(std::begin(Match), std::end(Match),
Match_MissingFeature) == 1) {
ErrorInfo = Match_MissingFeature;
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
// operand failure.
if (std::count(std::begin(Match), std::end(Match),
Match_InvalidOperand) == 1) {
return Error(IDLoc, "invalid operand for instruction", EmptyRange,
MatchingInlineAsm);
}
if (std::count(std::begin(Match), std::end(Match),
Match_InvalidImmUnsignedi4) == 1) {
SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
EmptyRange, MatchingInlineAsm);
}
// If all of these were an outright failure, report it in a useless way.
return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
MatchingInlineAsm);
}
bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
}
bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
MCAsmParser &Parser = getParser();
2010-10-31 01:38:55 +08:00
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
else if (Parser.getTok().getString() == "noprefix")
return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
"supported: registers must have a "
"'%' prefix in .att_syntax");
}
getParser().setAssemblerDialect(0);
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
else if (Parser.getTok().getString() == "prefix")
return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
"supported: registers must not have "
"a '%' prefix in .intel_syntax");
}
return false;
} else if (IDVal == ".even")
return parseDirectiveEven(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_proc")
return parseDirectiveFPOProc(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_setframe")
return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_pushreg")
return parseDirectiveFPOPushReg(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_stackalloc")
return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_stackalign")
return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endprologue")
return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_endproc")
return parseDirectiveFPOEndProc(DirectiveID.getLoc());
2010-10-31 01:38:55 +08:00
return true;
}
/// parseDirectiveEven
/// ::= .even
bool X86AsmParser::parseDirectiveEven(SMLoc L) {
if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
return false;
const MCSection *Section = getStreamer().getCurrentSectionOnly();
if (!Section) {
getStreamer().InitSections(false);
Section = getStreamer().getCurrentSectionOnly();
}
if (Section->UseCodeAlign())
getStreamer().EmitCodeAlignment(2, 0);
else
getStreamer().EmitValueToAlignment(2, 0, 1, 0);
return false;
}
2010-10-31 01:38:55 +08:00
/// ParseDirectiveCode
/// ::= .code16 | .code32 | .code64
bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
MCAsmParser &Parser = getParser();
Code16GCC = false;
if (IDVal == ".code16") {
Parser.Lex();
if (!is16BitMode()) {
SwitchMode(X86::Mode16Bit);
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
}
} else if (IDVal == ".code16gcc") {
// .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
Parser.Lex();
Code16GCC = true;
if (!is16BitMode()) {
SwitchMode(X86::Mode16Bit);
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
}
} else if (IDVal == ".code32") {
Parser.Lex();
if (!is32BitMode()) {
SwitchMode(X86::Mode32Bit);
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
}
} else if (IDVal == ".code64") {
Parser.Lex();
if (!is64BitMode()) {
SwitchMode(X86::Mode64Bit);
getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
}
} else {
Error(L, "unknown directive " + IDVal);
return false;
}
2010-10-31 01:38:55 +08:00
return false;
}
2010-10-31 01:38:55 +08:00
// .cv_fpo_proc foo
bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
MCAsmParser &Parser = getParser();
StringRef ProcName;
int64_t ParamsSize;
if (Parser.parseIdentifier(ProcName))
return Parser.TokError("expected symbol name");
if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
return true;
if (!isUIntN(32, ParamsSize))
return Parser.TokError("parameters size out of range");
if (Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_proc' directive");
MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
}
// .cv_fpo_setframe ebp
bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
MCAsmParser &Parser = getParser();
unsigned Reg;
SMLoc DummyLoc;
if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_setframe' directive");
return getTargetStreamer().emitFPOSetFrame(Reg, L);
}
// .cv_fpo_pushreg ebx
bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
MCAsmParser &Parser = getParser();
unsigned Reg;
SMLoc DummyLoc;
if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
return getTargetStreamer().emitFPOPushReg(Reg, L);
}
// .cv_fpo_stackalloc 20
bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
MCAsmParser &Parser = getParser();
int64_t Offset;
if (Parser.parseIntToken(Offset, "expected offset") ||
Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
return getTargetStreamer().emitFPOStackAlloc(Offset, L);
}
// .cv_fpo_stackalign 8
bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
MCAsmParser &Parser = getParser();
int64_t Offset;
if (Parser.parseIntToken(Offset, "expected offset") ||
Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
return getTargetStreamer().emitFPOStackAlign(Offset, L);
}
// .cv_fpo_endprologue
bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
MCAsmParser &Parser = getParser();
if (Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
return getTargetStreamer().emitFPOEndPrologue(L);
}
// .cv_fpo_endproc
bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
MCAsmParser &Parser = getParser();
if (Parser.parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_endproc' directive");
return getTargetStreamer().emitFPOEndProc(L);
}
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
}
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_SUBTARGET_FEATURE_NAME
#include "X86GenAsmMatcher.inc"