[ms] [llvm-ml] Add initial MASM STRUCT/UNION support

Summary:
Add support for user-defined types to MasmParser, including initialization and field access.

Known issues:
- Omitted entry initializers (e.g., <,0>) do not work consistently for nested structs/arrays.
- Size checking/inference for values with known types is not yet implemented.
- Some ml64.exe syntaxes for accessing STRUCT fields are not recognized.
  - `[<register>.<struct name>].<field>`
  - `[<register>[<struct name>.<field>]]`
  - `(<struct name> PTR [<register>]).<field>`
  - `[<variable>.<struct name>].<field>`
  - `(<struct name> PTR <variable>).<field>`

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D75306
This commit is contained in:
Eric Astor 2020-07-07 17:01:10 -04:00
parent 24ed3a9403
commit bc8e262afe
6 changed files with 1649 additions and 236 deletions

View File

@ -170,6 +170,11 @@ public:
virtual bool isParsingMasm() const { return false; }
virtual bool LookUpFieldOffset(StringRef Base, StringRef Member,
unsigned &Offset) {
return true;
}
/// Parse MS-style inline assembly.
virtual bool parseMSInlineAsm(
void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,

View File

@ -334,7 +334,7 @@ protected: // Can only create subclasses.
/// SemaCallback - The Sema callback implementation. Must be set when parsing
/// ms-style inline assembly.
MCAsmParserSemaCallback *SemaCallback;
MCAsmParserSemaCallback *SemaCallback = nullptr;
/// Set of options which affects instrumentation of inline assembly.
MCTargetOptions MCOptions;

File diff suppressed because it is too large Load Diff

View File

@ -864,6 +864,8 @@ private:
return nullptr;
}
bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc,
SMLoc EndLoc);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
bool RestoreOnFailure);
@ -1145,6 +1147,108 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
return checkScale(Scale, ErrMsg);
}
bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName,
SMLoc StartLoc, SMLoc EndLoc) {
// If we encounter a %, ignore it. This code handles registers with and
// without the prefix, unprefixed registers can occur in cfi directives.
RegName.consume_front("%");
RegNo = MatchRegisterName(RegName);
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
RegNo = MatchRegisterName(RegName.lower());
// The "flags" and "mxcsr" registers cannot be referenced directly.
// Treat it as an identifier instead.
if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
(RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
RegNo = 0;
if (!is64BitMode()) {
// FIXME: This should be done using Requires<Not64BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
// checked.
// FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
// REX prefix.
if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo)) {
return Error(StartLoc,
"register %" + RegName + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
}
}
// If this is "db[0-15]", match it as an alias
// for dr[0-15].
if (RegNo == 0 && RegName.startswith("db")) {
if (RegName.size() == 3) {
switch (RegName[2]) {
case '0':
RegNo = X86::DR0;
break;
case '1':
RegNo = X86::DR1;
break;
case '2':
RegNo = X86::DR2;
break;
case '3':
RegNo = X86::DR3;
break;
case '4':
RegNo = X86::DR4;
break;
case '5':
RegNo = X86::DR5;
break;
case '6':
RegNo = X86::DR6;
break;
case '7':
RegNo = X86::DR7;
break;
case '8':
RegNo = X86::DR8;
break;
case '9':
RegNo = X86::DR9;
break;
}
} else if (RegName.size() == 4 && RegName[2] == '1') {
switch (RegName[3]) {
case '0':
RegNo = X86::DR10;
break;
case '1':
RegNo = X86::DR11;
break;
case '2':
RegNo = X86::DR12;
break;
case '3':
RegNo = X86::DR13;
break;
case '4':
RegNo = X86::DR14;
break;
case '5':
RegNo = X86::DR15;
break;
}
}
}
if (RegNo == 0) {
if (isParsingIntelSyntax())
return true;
return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
}
return false;
}
bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc, bool RestoreOnFailure) {
MCAsmParser &Parser = getParser();
@ -1180,37 +1284,9 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMRange(StartLoc, EndLoc));
}
RegNo = MatchRegisterName(Tok.getString());
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
RegNo = MatchRegisterName(Tok.getString().lower());
// The "flags" and "mxcsr" registers cannot be referenced directly.
// Treat it as an identifier instead.
if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
(RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
RegNo = 0;
if (!is64BitMode()) {
// FIXME: This should be done using Requires<Not64BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
// checked.
// FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
// REX prefix.
if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo)) {
StringRef RegName = Tok.getString();
OnFailure();
if (!RestoreOnFailure) {
Parser.Lex(); // Eat register name.
}
return Error(StartLoc,
"register %" + RegName + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
}
if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
OnFailure();
return true;
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@ -1259,40 +1335,6 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
EndLoc = Parser.getTok().getEndLoc();
// If this is "db[0-15]", match it as an alias
// for dr[0-15].
if (RegNo == 0 && Tok.getString().startswith("db")) {
if (Tok.getString().size() == 3) {
switch (Tok.getString()[2]) {
case '0': RegNo = X86::DR0; break;
case '1': RegNo = X86::DR1; break;
case '2': RegNo = X86::DR2; break;
case '3': RegNo = X86::DR3; break;
case '4': RegNo = X86::DR4; break;
case '5': RegNo = X86::DR5; break;
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
case '8': RegNo = X86::DR8; break;
case '9': RegNo = X86::DR9; break;
}
} else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
switch (Tok.getString()[3]) {
case '0': RegNo = X86::DR10; break;
case '1': RegNo = X86::DR11; break;
case '2': RegNo = X86::DR12; break;
case '3': RegNo = X86::DR13; break;
case '4': RegNo = X86::DR14; break;
case '5': RegNo = X86::DR15; break;
}
}
if (RegNo != 0) {
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat it.
return false;
}
}
if (RegNo == 0) {
OnFailure();
if (isParsingIntelSyntax()) return true;
@ -1590,12 +1632,41 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
// Register
// Register, or (MASM only) <register>.<field>
unsigned Reg;
if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
if (SM.onRegister(Reg, ErrMsg))
return Error(Tok.getLoc(), ErrMsg);
break;
if (Tok.is(AsmToken::Identifier)) {
if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
if (SM.onRegister(Reg, ErrMsg))
return Error(IdentLoc, ErrMsg);
break;
}
if (Parser.isParsingMasm()) {
const std::pair<StringRef, StringRef> RegField =
Tok.getString().split('.');
const StringRef RegName = RegField.first, Field = RegField.second;
SMLoc RegEndLoc =
SMLoc::getFromPointer(RegName.data() + RegName.size());
if (!Field.empty() &&
!MatchRegisterByName(Reg, RegName, IdentLoc, RegEndLoc)) {
if (SM.onRegister(Reg, ErrMsg))
return Error(IdentLoc, ErrMsg);
SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
const std::pair<StringRef, StringRef> BaseMember = Field.split('.');
const StringRef Base = BaseMember.first, Member = BaseMember.second;
unsigned Offset;
if (Parser.LookUpFieldOffset(Base, Member, Offset))
return Error(FieldStartLoc, "unknown offset");
else if (SM.onPlus(ErrMsg))
return Error(getTok().getLoc(), ErrMsg);
else if (SM.onInteger(Offset, ErrMsg))
return Error(IdentLoc, ErrMsg);
End = consumeToken();
break;
}
}
}
// Operator synonymous ("not", "or" etc.)
bool ParseError = false;
@ -1607,37 +1678,39 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
// Symbol reference, when parsing assembly content
InlineAsmIdentifierInfo Info;
const MCExpr *Val;
if (!isParsingMSInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End)) {
return Error(Tok.getLoc(), "Unexpected identifier!");
} else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
return Error(IdentLoc, ErrMsg);
} else
if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
// MS Dot Operator expression
if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
if (ParseIntelDotOperator(SM, End))
return true;
break;
}
}
// MS InlineAsm operators (TYPE/LENGTH/SIZE)
if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
if (SM.onInteger(Val, ErrMsg))
return Error(IdentLoc, ErrMsg);
} else
if (isParsingMSInlineAsm()) {
// MS InlineAsm operators (TYPE/LENGTH/SIZE)
if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
if (SM.onInteger(Val, ErrMsg))
return Error(IdentLoc, ErrMsg);
} else
return true;
break;
}
// MS InlineAsm identifier
// Call parseIdentifier() to combine @ with the identifier behind it.
if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
return Error(IdentLoc, "expected identifier");
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
return true;
else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
return Error(IdentLoc, ErrMsg);
break;
}
// MS Dot Operator expression
if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
if (ParseIntelDotOperator(SM, End))
return true;
break;
}
// MS InlineAsm identifier
// Call parseIdentifier() to combine @ with the identifier behind it.
if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
return Error(IdentLoc, "expected identifier");
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
return true;
else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
if (getParser().parsePrimaryExpr(Val, End)) {
return Error(Tok.getLoc(), "Unexpected identifier!");
} else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
return Error(IdentLoc, ErrMsg);
}
break;
}
case AsmToken::Integer: {
@ -1856,10 +1929,14 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
APInt DotDisp;
DotDispStr.getAsInteger(10, DotDisp);
Offset = DotDisp.getZExtValue();
} else if (isParsingMSInlineAsm() && Tok.is(AsmToken::Identifier)) {
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
Offset))
} else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
Tok.is(AsmToken::Identifier)) {
const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
const StringRef Base = BaseMember.first, Member = BaseMember.second;
if (getParser().LookUpFieldOffset(SM.getSymName(), DotDispStr, Offset) &&
getParser().LookUpFieldOffset(Base, Member, Offset) &&
(!SemaCallback ||
SemaCallback->LookupInlineAsmField(Base, Member, Offset)))
return Error(Tok.getLoc(), "Unable to lookup field reference!");
} else
return Error(Tok.getLoc(), "Unexpected token type!");

View File

@ -0,0 +1,104 @@
# RUN: llvm-ml -filetype=asm %s | FileCheck %s
.data
BAZ STRUCT
a BYTE 1
b BYTE 2
BAZ ENDS
FOOBAR struct 2
c BYTE 3 DUP (4)
d DWORD 5
e BAZ <>
STRUCT f
g BYTE 6
h BYTE 7
ends
h BYTE "abcde"
foobar ENDS
t1 foobar <>
; CHECK: t1:
;
; BYTE 3 DUP (4), plus alignment padding
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .zero 1
;
; DWORD 5
; CHECK-NEXT: .long 5
;
; BAZ <>
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 2
;
; <BYTE 6, BYTE 7>, with internal alignment padding
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .zero 1
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .zero 1
;
; BYTE "abcde", plus alignment padding
; CHECK-NEXT: .byte 97
; CHECK-NEXT: .byte 98
; CHECK-NEXT: .byte 99
; CHECK-NEXT: .byte 100
; CHECK-NEXT: .byte 101
; CHECK-NEXT: .zero 1
t2 FOOBAR <"gh",,<10,11>,<12>,"ijk">
; CHECK: t2:
;
; BYTE "gh", padded with " ", plus alignment padding
; CHECK-NEXT: .byte 103
; CHECK-NEXT: .byte 104
; CHECK-NEXT: .byte 32
; CHECK-NEXT: .zero 1
;
; DWORD 5 (default-initialized when omitted)
; CHECK-NEXT: .long 5
;
; BAZ <10, 11>
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .byte 11
;
; <BYTE 6, BYTE 7>, with internal alignment padding
; CHECK-NEXT: .byte 12
; CHECK-NEXT: .zero 1
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .zero 1
;
; BYTE "ijk", padded with " ", plus alignment padding
; CHECK-NEXT: .byte 105
; CHECK-NEXT: .byte 106
; CHECK-NEXT: .byte 107
; CHECK-NEXT: .byte 32
; CHECK-NEXT: .byte 32
; CHECK-NEXT: .zero 1
.code
t3:
mov eax, t2.f.h
mov eax, [t2].f.h
mov eax, [t2.f.h]
mov eax, t2.FOOBAR.f.h
; CHECK: t3:
; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
t4:
mov eax, j.FOOBAR.f.h
mov eax, j.baz.b
; CHECK: t4:
; CHECK-NEXT: mov eax, dword ptr [rip + j+12]
; CHECK-NEXT: mov eax, dword ptr [rip + j+1]
END

View File

@ -0,0 +1,57 @@
# RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --dump-input=always
.data
int_test STRUCT
int_arr DWORD ?, ?
int_scalar DWORD ?
int_test ENDS
t1 int_test <<1,2,3>>
// CHECK: error: Initializer too long for field; expected at most 2 elements, got 3
t2 int_test <4>
// CHECK: error: Cannot initialize array field with scalar value
t3 int_test <,<5,6>>
// CHECK: error: Cannot initialize scalar field with array value
real_test STRUCT
real_arr REAL4 ?, ?, ?
real_scalar REAL4 ?
real_test ENDS
t4 real_test <<1.0,0.0,-1.0,-2.0>>
// CHECK: error: Initializer too long for field; expected at most 3 elements, got 4
t5 real_test <2.0>
// CHECK: error: Cannot initialize array field with scalar value
t6 real_test <,<2.0,-2.0>>
// CHECK: error: Cannot initialize scalar field with array value
inner_struct STRUCT
a BYTE ?
inner_struct ENDS
struct_test STRUCT
struct_arr inner_struct 4 DUP (?)
struct_scalar inner_struct ?
struct_test ENDS
t7 struct_test <<<>, <>, <>, <>, <>>>
// CHECK: error: Initializer too long for field; expected at most 4 elements, got 5
t8 struct_test <,<<>, <>>>
// CHECK: error: 'inner_struct' initializer initializes too many fields
t9 STRUCT 3
// CHECK: error: alignment must be a power of two; was 3
t9 ENDS
t10 STRUCT 1, X
// CHECK: error: Unrecognized qualifier for 'STRUCT' directive; expected none or NONUNIQUE
t10 ENDS
t11 STRUCT
different_struct ENDS
// CHECK: error: mismatched name in ENDS directive; expected 't11'