[AsmParser][SystemZ][z/OS] Add support to AsmLexer to accept HLASM style integers

- Add support for HLASM style integers. These are the decimal integers [0-9].
- HLASM does not support the additional prefixed integers like, `0b`, `0x`, octal integers and Masm style integers.
- To achieve this, a field `LexHLASMStyleIntegers` (similar to the `LexMasmStyleIntegers` field) is introduced in `MCAsmLexer.h` as well as a corresponding setter.

Note: This field could also go into MCAsmInfo.h. I used the previous precedent set by the `LexMasmIntegers` field.

Depends on https://reviews.llvm.org/D99286

Reviewed By: epastor

Differential Revision: https://reviews.llvm.org/D99374
This commit is contained in:
Anirudh Prasad 2021-04-13 15:25:00 -04:00
parent c058a71227
commit 6ddd8c28b7
3 changed files with 109 additions and 12 deletions

View File

@ -56,6 +56,7 @@ protected: // Can only create subclasses.
bool LexMotorolaIntegers = false;
bool UseMasmDefaultRadix = false;
unsigned DefaultRadix = 10;
bool LexHLASMIntegers = false;
AsmCommentConsumer *CommentConsumer = nullptr;
MCAsmLexer();
@ -176,6 +177,9 @@ public:
/// Set whether to lex Motorola-style integer literals, such as $deadbeef or
/// %01010110.
void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
/// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
};
} // end namespace llvm

View File

@ -456,26 +456,32 @@ AsmToken AsmLexer::LexDigit() {
}
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
// HLASM-flavour decimal integer: [0-9][0-9]*
// FIXME: Later on, support for fb for HLASM has to be added in
// as they probably would be needed for asm goto
if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
bool isHex = Radix == 16;
// Check for floating point literals.
if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
if (*CurPtr == '.')
++CurPtr;
return LexFloatLiteral();
if (!LexHLASMIntegers) {
bool IsHex = Radix == 16;
// Check for floating point literals.
if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
if (*CurPtr == '.')
++CurPtr;
return LexFloatLiteral();
}
}
StringRef Result(TokStart, CurPtr - TokStart);
APInt Value(128, 0, true);
if (Result.getAsInteger(Radix, Value)) {
if (Result.getAsInteger(Radix, Value))
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
}
// The darwin/x86 (and x86-64) assembler accepts and ignores type
// suffices on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
if (!LexHLASMIntegers)
// The darwin/x86 (and x86-64) assembler accepts and ignores type
// suffices on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
return intToken(Result, Value);
}

View File

@ -109,6 +109,21 @@ protected:
Lexer.Lex();
}
}
void lexAndCheckIntegerTokensAndValues(StringRef AsmStr,
SmallVector<int64_t> ExpectedValues) {
// Get reference to AsmLexer.
MCAsmLexer &Lexer = Parser->getLexer();
// Loop through all expected tokens and expected values.
for (size_t I = 0; I < ExpectedValues.size(); ++I) {
// Skip any EndOfStatement tokens, we're not concerned with them.
if (Lexer.getTok().getKind() == AsmToken::EndOfStatement)
continue;
EXPECT_EQ(Lexer.getTok().getKind(), AsmToken::Integer);
EXPECT_EQ(Lexer.getTok().getIntVal(), ExpectedValues[I]);
Lexer.Lex();
}
}
};
TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
@ -367,4 +382,76 @@ TEST_F(SystemZAsmLexerTest, CheckStrictCommentString5) {
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
TEST_F(SystemZAsmLexerTest, CheckValidHLASMIntegers) {
StringRef AsmStr = "123\n000123\n1999\n007\n12300\n12021\n";
// StringRef AsmStr = "123";
// Setup.
setupCallToAsmParser(AsmStr);
Parser->getLexer().setLexHLASMIntegers(true);
// Lex initially to get the string.
Parser->getLexer().Lex();
// SmallVector<int64_t> ExpectedValues({123});
SmallVector<int64_t> ExpectedValues({123, 123, 1999, 7, 12300, 12021});
lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues);
}
TEST_F(SystemZAsmLexerTest, CheckInvalidHLASMIntegers) {
StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n.133\n";
// Setup.
setupCallToAsmParser(AsmStr);
Parser->getLexer().setLexHLASMIntegers(true);
// Lex initially to get the string.
Parser->getLexer().Lex();
SmallVector<AsmToken::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(AsmToken::Integer); // "0"
ExpectedTokens.push_back(AsmToken::Identifier); // "b0101"
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
ExpectedTokens.push_back(AsmToken::Integer); // "0"
ExpectedTokens.push_back(AsmToken::Identifier); // "xDEADBEEF"
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
ExpectedTokens.push_back(AsmToken::Identifier); // "fffh"
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
ExpectedTokens.push_back(AsmToken::Real); // ".133"
ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
ExpectedTokens.push_back(AsmToken::Eof);
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
TEST_F(SystemZAsmLexerTest, CheckDefaultIntegers) {
StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n";
// Setup.
setupCallToAsmParser(AsmStr);
// Lex initially to get the string.
Parser->getLexer().Lex();
SmallVector<int64_t> ExpectedValues({5, 0xDEADBEEF, 0xFFF});
lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues);
}
TEST_F(SystemZAsmLexerTest, CheckDefaultFloats) {
StringRef AsmStr = "0.333\n1.3\n2.5\n3.0\n";
// Setup.
setupCallToAsmParser(AsmStr);
// Lex initially to get the string.
Parser->getLexer().Lex();
SmallVector<AsmToken::TokenKind> ExpectedTokens;
for (int I = 0; I < 4; ++I)
ExpectedTokens.insert(ExpectedTokens.begin(),
{AsmToken::Real, AsmToken::EndOfStatement});
ExpectedTokens.push_back(AsmToken::Eof);
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
} // end anonymous namespace