MC: Split the x86 asm matcher implementations by dialect

The existing matcher has lots of AT&T assembly dialect assumptions baked
into it.  In particular, the hack for resolving the size of a memory
operand by appending the four most common suffixes doesn't work at all.
The Intel assembly dialect mnemonic table has ambiguous entries, so we
need to try matching multiple times with different operand sizes, since
that's the only way to choose different instruction variants.

This makes us more compatible with gas's implementation of Intel
assembly syntax.  MSVC assumes you want byte-sized operations for the
instructions that we reject as ambiguous.

Reviewed By: grosbach

Differential Revision: http://reviews.llvm.org/D4747

llvm-svn: 216481
This commit is contained in:
Reid Kleckner 2014-08-26 20:32:34 +00:00
parent a2a1e53085
commit f6fb780890
5 changed files with 296 additions and 33 deletions

View File

@ -697,6 +697,29 @@ private:
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
MCStreamer &Out, bool MatchingInlineAsm);
bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
bool MatchingInlineAsm);
bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm);
bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm);
unsigned getPointerSize() {
if (is16BitMode()) return 16;
if (is32BitMode()) return 32;
if (is64BitMode()) return 64;
llvm_unreachable("invalid mode");
}
virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
/// doSrcDstMatch - Returns true if operands are matching in their
@ -2309,12 +2332,16 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
assert(Op.isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = None;
if (isParsingIntelSyntax())
return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
MatchingInlineAsm);
return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
MatchingInlineAsm);
}
// First, handle aliases that expand to multiple instructions.
void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
OperandVector &Operands, MCStreamer &Out,
bool MatchingInlineAsm) {
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
@ -2336,6 +2363,36 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
EmitInstruction(Inst, Operands, Out);
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
}
bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
bool MatchingInlineAsm) {
assert(ErrorInfo && "Unknown missing feature!");
ArrayRef<SMRange> EmptyRanges = None;
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
OS << "instruction requires:";
uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask)
OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
Mask <<= 1;
}
return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
}
bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
assert(Op.isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = None;
// First, handle aliases that expand to multiple instructions.
MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
bool WasOriginallyInvalidOperand = false;
MCInst Inst;
@ -2358,21 +2415,8 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
case Match_MissingFeature: {
assert(ErrorInfo && "Unknown missing feature!");
// Special case the error message for the very common case where only
// a single subtarget feature is missing.
std::string Msg = "instruction requires:";
uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask) {
Msg += " ";
Msg += getSubtargetFeatureName(ErrorInfo & Mask);
}
Mask <<= 1;
}
return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
}
case Match_MissingFeature:
return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@ -2490,25 +2534,17 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// missing feature.
if (std::count(std::begin(Match), std::end(Match),
Match_MissingFeature) == 1) {
std::string Msg = "instruction requires:";
uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
if (ErrorInfoMissingFeature & Mask) {
Msg += " ";
Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
}
Mask <<= 1;
}
return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
ErrorInfo = ErrorInfoMissingFeature;
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
// operand failure.
if (std::count(std::begin(Match), std::end(Match),
Match_InvalidOperand) == 1) {
Error(IDLoc, "invalid operand for instruction", EmptyRanges,
MatchingInlineAsm);
return true;
return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
MatchingInlineAsm);
}
// If all of these were an outright failure, report it in a useless way.
@ -2517,6 +2553,132 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
assert(Op.isToken() && "Leading operand should always be a mnemonic!");
StringRef Mnemonic = Op.getToken();
ArrayRef<SMRange> EmptyRanges = None;
// First, handle aliases that expand to multiple instructions.
MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
MCInst Inst;
// Find one unsized memory operand, if present.
X86Operand *UnsizedMemOp = nullptr;
for (const auto &Op : Operands) {
X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
// FIXME: Remove this exception for absolute memory references. Currently it
// allows us to assemble 'call foo', because foo is represented as a memory
// operand.
if (X86Op->isMemUnsized() && !X86Op->isAbsMem())
UnsizedMemOp = X86Op;
}
// Allow some instructions to have implicitly pointer-sized operands. This is
// compatible with gas.
if (UnsizedMemOp) {
static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
for (const char *Instr : PtrSizedInstrs) {
if (Mnemonic == Instr) {
UnsizedMemOp->Mem.Size = getPointerSize();
break;
}
}
}
// If an unsized memory operand is present, try to match with each memory
// operand size. In Intel assembly, the size is not part of the instruction
// mnemonic.
SmallVector<unsigned, 8> Match;
uint64_t ErrorInfoMissingFeature = 0;
if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
static const unsigned MopSizes[] = {8, 16, 32, 64, 80};
for (unsigned Size : MopSizes) {
UnsizedMemOp->Mem.Size = Size;
uint64_t ErrorInfoIgnore;
Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
MatchingInlineAsm,
isParsingIntelSyntax()));
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfoIgnore;
}
} else {
Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm,
isParsingIntelSyntax()));
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfo;
}
// Restore the size of the unsized memory operand if we modified it.
if (UnsizedMemOp)
UnsizedMemOp->Mem.Size = 0;
// If it's a bad mnemonic, all results will be the same.
if (Match.back() == Match_MnemonicFail) {
ArrayRef<SMRange> Ranges =
MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
Ranges, MatchingInlineAsm);
}
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
unsigned NumSuccessfulMatches =
std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the individual
// transformations can chain off each other.
if (!MatchingInlineAsm)
while (processInstruction(Inst, Operands))
;
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
} else if (NumSuccessfulMatches > 1) {
assert(UnsizedMemOp &&
"multiple matches only possible with unsized memory operands");
ArrayRef<SMRange> Ranges =
MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
return Error(UnsizedMemOp->getStartLoc(),
"ambiguous operand size for instruction '" + Mnemonic + "\'",
Ranges, MatchingInlineAsm);
}
// If one instruction matched with a missing feature, report this as a
// missing feature.
if (std::count(std::begin(Match), std::end(Match),
Match_MissingFeature) == 1) {
ErrorInfo = ErrorInfoMissingFeature;
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
// operand failure.
if (std::count(std::begin(Match), std::end(Match),
Match_InvalidOperand) == 1) {
return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
MatchingInlineAsm);
}
// If all of these were an outright failure, report it in a useless way.
return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
MatchingInlineAsm);
}
bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
}

View File

@ -205,6 +205,9 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isMem() const override { return Kind == Memory; }
bool isMemUnsized() const {
return Kind == Memory && Mem.Size == 0;
}
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}

View File

@ -0,0 +1,44 @@
// RUN: not llvm-mc -triple i686-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
.intel_syntax
// Basic case of ambiguity for inc.
inc [eax]
// CHECK: error: ambiguous operand size for instruction 'inc'
inc dword ptr [eax]
inc word ptr [eax]
inc byte ptr [eax]
// CHECK-NOT: error:
// Other ambiguous instructions. Anything that doesn't take a register,
// basically.
dec [eax]
// CHECK: error: ambiguous operand size for instruction 'dec'
mov [eax], 1
// CHECK: error: ambiguous operand size for instruction 'mov'
and [eax], 0
// CHECK: error: ambiguous operand size for instruction 'and'
or [eax], 1
// CHECK: error: ambiguous operand size for instruction 'or'
add [eax], 1
// CHECK: error: ambiguous operand size for instruction 'add'
sub [eax], 1
// CHECK: error: ambiguous operand size for instruction 'sub'
// gas assumes these instructions are pointer-sized by default, and we follow
// suit.
push [eax]
call [eax]
jmp [eax]
// CHECK-NOT: error:
add byte ptr [eax], eax
// CHECK: error: invalid operand for instruction
add byte ptr [eax], eax
// CHECK: error: invalid operand for instruction
add rax, 3
// CHECK: error: register %rax is only available in 64-bit mode

View File

@ -0,0 +1,20 @@
// RUN: llvm-mc %s -triple=i686-pc-windows | FileCheck %s
.intel_syntax
push [eax]
// CHECK: pushl (%eax)
call [eax]
// CHECK: calll *(%eax)
jmp [eax]
// CHECK: jmpl *(%eax)
// mode switch
.code16
push [eax]
// CHECK: pushw (%eax)
call [eax]
// CHECK: callw *(%eax)
jmp [eax]
// CHECK: jmpw *(%eax)

View File

@ -607,3 +607,37 @@ fadd "?half@?0??bar@@YAXXZ@4NA"
fadd "?half@?0??bar@@YAXXZ@4NA"@IMGREL
// CHECK: fadds "?half@?0??bar@@YAXXZ@4NA"
// CHECK: fadds "?half@?0??bar@@YAXXZ@4NA"@IMGREL32
inc qword ptr [rax]
inc dword ptr [rax]
inc word ptr [rax]
inc byte ptr [rax]
// CHECK: incq (%rax)
// CHECK: incl (%rax)
// CHECK: incw (%rax)
// CHECK: incb (%rax)
dec qword ptr [rax]
dec dword ptr [rax]
dec word ptr [rax]
dec byte ptr [rax]
// CHECK: decq (%rax)
// CHECK: decl (%rax)
// CHECK: decw (%rax)
// CHECK: decb (%rax)
add qword ptr [rax], 1
add dword ptr [rax], 1
add word ptr [rax], 1
add byte ptr [rax], 1
// CHECK: addq $1, (%rax)
// CHECK: addl $1, (%rax)
// CHECK: addw $1, (%rax)
// CHECK: addb $1, (%rax)
fstp xword ptr [rax]
fstp qword ptr [rax]
fstp dword ptr [rax]
// CHECK: fstpt (%rax)
// CHECK: fstpl (%rax)
// CHECK: fstps (%rax)