From 6ddd8c28b787b50a37df84890563d46dbfc3a81c Mon Sep 17 00:00:00 2001 From: Anirudh Prasad Date: Tue, 13 Apr 2021 15:25:00 -0400 Subject: [PATCH] [AsmParser][SystemZ][z/OS] Add support to AsmLexer to accept HLASM style integers - Add support for HLASM style integers. These are the decimal integers [0-9]. - HLASM does not support the additional prefixed integers like, `0b`, `0x`, octal integers and Masm style integers. - To achieve this, a field `LexHLASMStyleIntegers` (similar to the `LexMasmStyleIntegers` field) is introduced in `MCAsmLexer.h` as well as a corresponding setter. Note: This field could also go into MCAsmInfo.h. I used the previous precedent set by the `LexMasmIntegers` field. Depends on https://reviews.llvm.org/D99286 Reviewed By: epastor Differential Revision: https://reviews.llvm.org/D99374 --- llvm/include/llvm/MC/MCParser/MCAsmLexer.h | 4 + llvm/lib/MC/MCParser/AsmLexer.cpp | 30 ++++--- .../MC/SystemZ/SystemZAsmLexerTest.cpp | 87 +++++++++++++++++++ 3 files changed, 109 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h index bbc890c5644a..6a604014a837 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -56,6 +56,7 @@ protected: // Can only create subclasses. bool LexMotorolaIntegers = false; bool UseMasmDefaultRadix = false; unsigned DefaultRadix = 10; + bool LexHLASMIntegers = false; AsmCommentConsumer *CommentConsumer = nullptr; MCAsmLexer(); @@ -176,6 +177,9 @@ public: /// Set whether to lex Motorola-style integer literals, such as $deadbeef or /// %01010110. void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; } + + /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]* + void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; } }; } // end namespace llvm diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index ab105c610057..5fe3be42c801 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -456,26 +456,32 @@ AsmToken AsmLexer::LexDigit() { } // Decimal integer: [1-9][0-9]* - if (CurPtr[-1] != '0' || CurPtr[0] == '.') { + // HLASM-flavour decimal integer: [0-9][0-9]* + // FIXME: Later on, support for fb for HLASM has to be added in + // as they probably would be needed for asm goto + if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') { unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); - bool isHex = Radix == 16; - // Check for floating point literals. - if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) { - if (*CurPtr == '.') - ++CurPtr; - return LexFloatLiteral(); + + if (!LexHLASMIntegers) { + bool IsHex = Radix == 16; + // Check for floating point literals. + if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) { + if (*CurPtr == '.') + ++CurPtr; + return LexFloatLiteral(); + } } StringRef Result(TokStart, CurPtr - TokStart); APInt Value(128, 0, true); - if (Result.getAsInteger(Radix, Value)) { + if (Result.getAsInteger(Radix, Value)) return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); - } - // The darwin/x86 (and x86-64) assembler accepts and ignores type - // suffices on integer literals. - SkipIgnoredIntegerSuffix(CurPtr); + if (!LexHLASMIntegers) + // The darwin/x86 (and x86-64) assembler accepts and ignores type + // suffices on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); return intToken(Result, Value); } diff --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp index a1253eaff43d..d7e90f1b9a24 100644 --- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp +++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp @@ -109,6 +109,21 @@ protected: Lexer.Lex(); } } + + void lexAndCheckIntegerTokensAndValues(StringRef AsmStr, + SmallVector ExpectedValues) { + // Get reference to AsmLexer. + MCAsmLexer &Lexer = Parser->getLexer(); + // Loop through all expected tokens and expected values. + for (size_t I = 0; I < ExpectedValues.size(); ++I) { + // Skip any EndOfStatement tokens, we're not concerned with them. + if (Lexer.getTok().getKind() == AsmToken::EndOfStatement) + continue; + EXPECT_EQ(Lexer.getTok().getKind(), AsmToken::Integer); + EXPECT_EQ(Lexer.getTok().getIntVal(), ExpectedValues[I]); + Lexer.Lex(); + } + } }; TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) { @@ -367,4 +382,76 @@ TEST_F(SystemZAsmLexerTest, CheckStrictCommentString5) { lexAndCheckTokens(AsmStr, ExpectedTokens); } + +TEST_F(SystemZAsmLexerTest, CheckValidHLASMIntegers) { + StringRef AsmStr = "123\n000123\n1999\n007\n12300\n12021\n"; + // StringRef AsmStr = "123"; + // Setup. + setupCallToAsmParser(AsmStr); + Parser->getLexer().setLexHLASMIntegers(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // SmallVector ExpectedValues({123}); + SmallVector ExpectedValues({123, 123, 1999, 7, 12300, 12021}); + lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues); +} + +TEST_F(SystemZAsmLexerTest, CheckInvalidHLASMIntegers) { + StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n.133\n"; + + // Setup. + setupCallToAsmParser(AsmStr); + Parser->getLexer().setLexHLASMIntegers(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens; + ExpectedTokens.push_back(AsmToken::Integer); // "0" + ExpectedTokens.push_back(AsmToken::Identifier); // "b0101" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n" + ExpectedTokens.push_back(AsmToken::Integer); // "0" + ExpectedTokens.push_back(AsmToken::Identifier); // "xDEADBEEF" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n" + ExpectedTokens.push_back(AsmToken::Identifier); // "fffh" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n" + ExpectedTokens.push_back(AsmToken::Real); // ".133" + ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n" + ExpectedTokens.push_back(AsmToken::Eof); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckDefaultIntegers) { + StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedValues({5, 0xDEADBEEF, 0xFFF}); + lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues); +} + +TEST_F(SystemZAsmLexerTest, CheckDefaultFloats) { + StringRef AsmStr = "0.333\n1.3\n2.5\n3.0\n"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens; + + for (int I = 0; I < 4; ++I) + ExpectedTokens.insert(ExpectedTokens.begin(), + {AsmToken::Real, AsmToken::EndOfStatement}); + + ExpectedTokens.push_back(AsmToken::Eof); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} } // end anonymous namespace