diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp index 917cf913f14b..e8034d82c15b 100644 --- a/mlir/lib/Parser/Lexer.cpp +++ b/mlir/lib/Parser/Lexer.cpp @@ -63,114 +63,107 @@ Token Lexer::emitError(const char *loc, const Twine &message) { } Token Lexer::lexToken() { - // Ignore whitespace. while (true) { - switch (*curPtr) { + const char *tokStart = curPtr; + switch (*curPtr++) { + default: + // Handle bare identifiers. + if (isalpha(curPtr[-1])) + return lexBareIdentifierOrKeyword(tokStart); + + // Unknown character, emit an error. + return emitError(tokStart, "unexpected character"); + case ' ': case '\t': case '\n': case '\r': - ++curPtr; + // Handle whitespace. continue; - default: - // Terminate loop on non-whitespace, including either an embedded or - // final terminating nul character that llvm::MemoryBuffer guarantees - // will be there. - break; - } - break; - } - const char *tokStart = curPtr; - switch (*curPtr++) { - default: - // Handle bare identifiers. - if (isalpha(curPtr[-1])) + case '_': + // Handle bare identifiers. return lexBareIdentifierOrKeyword(tokStart); - // Unknown character, emit an error. - return emitError(tokStart, "unexpected character"); + case 0: + // This may either be a nul character in the source file or may be the EOF + // marker that llvm::MemoryBuffer guarantees will be there. + if (curPtr - 1 == curBuffer.end()) + return formToken(Token::eof, tokStart); - case '_': - // Handle bare identifiers. - return lexBareIdentifierOrKeyword(tokStart); + LLVM_FALLTHROUGH; + case ':': + return formToken(Token::colon, tokStart); + case ',': + return formToken(Token::comma, tokStart); + case '.': + return lexEllipsis(tokStart); + case '(': + return formToken(Token::l_paren, tokStart); + case ')': + return formToken(Token::r_paren, tokStart); + case '{': + return formToken(Token::l_brace, tokStart); + case '}': + return formToken(Token::r_brace, tokStart); + case '[': + return formToken(Token::l_square, tokStart); + case ']': + return formToken(Token::r_square, tokStart); + case '<': + return formToken(Token::less, tokStart); + case '>': + return formToken(Token::greater, tokStart); + case '=': + return formToken(Token::equal, tokStart); - case 0: - // This may either be a nul character in the source file or may be the EOF - // marker that llvm::MemoryBuffer guarantees will be there. - if (curPtr - 1 == curBuffer.end()) - return formToken(Token::eof, tokStart); + case '+': + return formToken(Token::plus, tokStart); + case '*': + return formToken(Token::star, tokStart); + case '-': + if (*curPtr == '>') { + ++curPtr; + return formToken(Token::arrow, tokStart); + } + return formToken(Token::minus, tokStart); - LLVM_FALLTHROUGH; - case ':': - return formToken(Token::colon, tokStart); - case ',': - return formToken(Token::comma, tokStart); - case '.': - return lexEllipsis(tokStart); - case '(': - return formToken(Token::l_paren, tokStart); - case ')': - return formToken(Token::r_paren, tokStart); - case '{': - return formToken(Token::l_brace, tokStart); - case '}': - return formToken(Token::r_brace, tokStart); - case '[': - return formToken(Token::l_square, tokStart); - case ']': - return formToken(Token::r_square, tokStart); - case '<': - return formToken(Token::less, tokStart); - case '>': - return formToken(Token::greater, tokStart); - case '=': - return formToken(Token::equal, tokStart); + case '?': + return formToken(Token::question, tokStart); - case '+': - return formToken(Token::plus, tokStart); - case '*': - return formToken(Token::star, tokStart); - case '-': - if (*curPtr == '>') { - ++curPtr; - return formToken(Token::arrow, tokStart); + case '/': + if (*curPtr == '/') { + skipComment(); + continue; + } + return emitError(tokStart, "unexpected character"); + + case '@': + return lexAtIdentifier(tokStart); + + case '!': + LLVM_FALLTHROUGH; + case '^': + LLVM_FALLTHROUGH; + case '#': + LLVM_FALLTHROUGH; + case '%': + return lexPrefixedIdentifier(tokStart); + case '"': + return lexString(tokStart); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return lexNumber(tokStart); } - return formToken(Token::minus, tokStart); - - case '?': - return formToken(Token::question, tokStart); - - case '/': - if (*curPtr == '/') - return lexComment(); - return emitError(tokStart, "unexpected character"); - - case '@': - return lexAtIdentifier(tokStart); - - case '!': - LLVM_FALLTHROUGH; - case '^': - LLVM_FALLTHROUGH; - case '#': - LLVM_FALLTHROUGH; - case '%': - return lexPrefixedIdentifier(tokStart); - case '"': - return lexString(tokStart); - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return lexNumber(tokStart); } } @@ -231,11 +224,11 @@ Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) { return Token(kind, spelling); } -/// Lex a comment line, starting with a semicolon. +/// Skip a comment line, starting with a '//'. /// /// TODO: add a regex for comments here and to the spec. /// -Token Lexer::lexComment() { +void Lexer::skipComment() { // Advance over the second '/' in a '//' comment. assert(*curPtr == '/'); ++curPtr; @@ -245,12 +238,12 @@ Token Lexer::lexComment() { case '\n': case '\r': // Newline is end of comment. - return lexToken(); + return; case 0: // If this is the end of the buffer, end the comment. if (curPtr - 1 == curBuffer.end()) { --curPtr; - return lexToken(); + return; } LLVM_FALLTHROUGH; default: diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h index b18077114578..a7a2ac4214c5 100644 --- a/mlir/lib/Parser/Lexer.h +++ b/mlir/lib/Parser/Lexer.h @@ -59,12 +59,14 @@ private: // Lexer implementation methods. Token lexAtIdentifier(const char *tokStart); Token lexBareIdentifierOrKeyword(const char *tokStart); - Token lexComment(); Token lexEllipsis(const char *tokStart); Token lexNumber(const char *tokStart); Token lexPrefixedIdentifier(const char *tokStart); Token lexString(const char *tokStart); + /// Skip a comment line, starting with a '//'. + void skipComment(); + const llvm::SourceMgr &sourceMgr; MLIRContext *context;