forked from OSchip/llvm-project
Integrated assembler incorrectly lexes ARM-style comments
The integrated assembler fails to properly lex arm comments when they are adjacent to an identifier in the input stream. The reason is that the arm comment symbol '@' is also used as symbol variant in other assembly languages so when lexing an identifier it allows the '@' symbol as part of the identifier. Example: $ cat comment.s foo: add r0, r0@got to parse this as a comment $ llvm-mc -triple armv7 comment.s comment.s:4:18: error: unexpected token in argument list add r0, r0@got to parse this as a comment ^ This should be parsed as correctly as `add r0, r0`. This commit modifes the assembly lexer to not include the '@' symbol in identifiers when lexing for targets that use '@' for comments. llvm-svn: 196607
This commit is contained in:
parent
3e325d7490
commit
2cdc56d26b
|
@ -139,20 +139,23 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
|
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
|
||||||
static bool IsIdentifierChar(char c) {
|
static bool IsIdentifierChar(char c, bool AllowAt) {
|
||||||
return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?';
|
return isalnum(c) || c == '_' || c == '$' || c == '.' ||
|
||||||
|
(c == '@' && AllowAt) || c == '?';
|
||||||
}
|
}
|
||||||
AsmToken AsmLexer::LexIdentifier() {
|
AsmToken AsmLexer::LexIdentifier() {
|
||||||
|
bool AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
|
||||||
// Check for floating point literals.
|
// Check for floating point literals.
|
||||||
if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
|
if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
|
||||||
// Disambiguate a .1243foo identifier from a floating literal.
|
// Disambiguate a .1243foo identifier from a floating literal.
|
||||||
while (isdigit(*CurPtr))
|
while (isdigit(*CurPtr))
|
||||||
++CurPtr;
|
++CurPtr;
|
||||||
if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
|
if (*CurPtr == 'e' || *CurPtr == 'E' ||
|
||||||
|
!IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
|
||||||
return LexFloatLiteral();
|
return LexFloatLiteral();
|
||||||
}
|
}
|
||||||
|
|
||||||
while (IsIdentifierChar(*CurPtr))
|
while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
|
||||||
++CurPtr;
|
++CurPtr;
|
||||||
|
|
||||||
// Handle . as a special case.
|
// Handle . as a special case.
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
@ Tests to check that '@' does not get lexed as an identifier for arm
|
||||||
|
@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck %s
|
||||||
|
@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s --check-prefix=ERROR
|
||||||
|
|
||||||
|
foo:
|
||||||
|
bl boo@plt should be ignored
|
||||||
|
bl goo@plt
|
||||||
|
.long bar@got to parse this as a comment
|
||||||
|
.long baz@got
|
||||||
|
add r0, r0@ignore this extra junk
|
||||||
|
|
||||||
|
@CHECK-LABEL: foo:
|
||||||
|
@CHECK: bl boo
|
||||||
|
@CHECK-NOT: @
|
||||||
|
@CHECK: bl goo
|
||||||
|
@CHECK-NOT: @
|
||||||
|
@CHECK: .long bar
|
||||||
|
@CHECK-NOT: @
|
||||||
|
@CHECK: .long baz
|
||||||
|
@CHECK-NOT: @
|
||||||
|
@CHECK: add r0, r0
|
||||||
|
@CHECK-NOT: @
|
||||||
|
|
||||||
|
@ERROR-NOT: error:
|
Loading…
Reference in New Issue