Lexer: NFC: sort helper methods alphabetically

Lexer methods were added progressively as implementation advanced. The rest of
MLIR now tends to sort methods alphabetically for better discoverability in
absence of tooling.  Sort the lexer methods as well.

PiperOrigin-RevId: 262406992
This commit is contained in:
Alex Zinenko 2019-08-08 12:18:05 -07:00 committed by A. Unique TensorFlower
parent 44d8637af1
commit 466b9f100c
2 changed files with 100 additions and 100 deletions

View File

@ -174,33 +174,21 @@ Token Lexer::lexToken() {
}
}
/// Lex a comment line, starting with a semicolon.
/// Lex an '@foo' identifier.
///
/// TODO: add a regex for comments here and to the spec.
/// symbol-ref-id ::= `@` bare-id
///
Token Lexer::lexComment() {
// Advance over the second '/' in a '//' comment.
assert(*curPtr == '/');
++curPtr;
Token Lexer::lexAtIdentifier(const char *tokStart) {
// These always start with a letter or underscore.
auto cur = *curPtr++;
if (!isalpha(cur) && cur != '_')
return emitError(curPtr - 1,
"@ identifier expected to start with letter or '_'");
while (true) {
switch (*curPtr++) {
case '\n':
case '\r':
// Newline is end of comment.
return lexToken();
case 0:
// If this is the end of the buffer, end the comment.
if (curPtr - 1 == curBuffer.end()) {
--curPtr;
return lexToken();
}
LLVM_FALLTHROUGH;
default:
// Skip over other characters.
break;
}
}
while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
*curPtr == '$' || *curPtr == '.')
++curPtr;
return formToken(Token::at_identifier, tokStart);
}
/// Lex a bare identifier or keyword that starts with a letter.
@ -234,21 +222,93 @@ Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
return Token(kind, spelling);
}
/// Lex an '@foo' identifier.
/// Lex a comment line, starting with a semicolon.
///
/// symbol-ref-id ::= `@` bare-id
/// TODO: add a regex for comments here and to the spec.
///
Token Lexer::lexAtIdentifier(const char *tokStart) {
// These always start with a letter or underscore.
auto cur = *curPtr++;
if (!isalpha(cur) && cur != '_')
return emitError(curPtr - 1,
"@ identifier expected to start with letter or '_'");
Token Lexer::lexComment() {
// Advance over the second '/' in a '//' comment.
assert(*curPtr == '/');
++curPtr;
while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
*curPtr == '$' || *curPtr == '.')
while (true) {
switch (*curPtr++) {
case '\n':
case '\r':
// Newline is end of comment.
return lexToken();
case 0:
// If this is the end of the buffer, end the comment.
if (curPtr - 1 == curBuffer.end()) {
--curPtr;
return lexToken();
}
LLVM_FALLTHROUGH;
default:
// Skip over other characters.
break;
}
}
}
/// Lex an ellipsis.
///
/// ellipsis ::= '...'
///
Token Lexer::lexEllipsis(const char *tokStart) {
assert(curPtr[-1] == '.');
if (curPtr == curBuffer.end() || *curPtr != '.' || *(curPtr + 1) != '.')
return emitError(curPtr, "expected three consecutive dots for an ellipsis");
curPtr += 2;
return formToken(Token::ellipsis, tokStart);
}
/// Lex a number literal.
///
/// integer-literal ::= digit+ | `0x` hex_digit+
/// float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
///
Token Lexer::lexNumber(const char *tokStart) {
assert(isdigit(curPtr[-1]));
// Handle the hexadecimal case.
if (curPtr[-1] == '0' && *curPtr == 'x') {
// If we see stuff like 0xi32, this is a literal `0` follwed by an
// identifier `xi32`, stop after `0`.
if (!isxdigit(curPtr[1]))
return formToken(Token::integer, tokStart);
curPtr += 2;
while (isxdigit(*curPtr))
++curPtr;
return formToken(Token::integer, tokStart);
}
// Handle the normal decimal case.
while (isdigit(*curPtr))
++curPtr;
return formToken(Token::at_identifier, tokStart);
if (*curPtr != '.')
return formToken(Token::integer, tokStart);
++curPtr;
// Skip over [0-9]*([eE][-+]?[0-9]+)?
while (isdigit(*curPtr))
++curPtr;
if (*curPtr == 'e' || *curPtr == 'E') {
if (isdigit(static_cast<unsigned char>(curPtr[1])) ||
((curPtr[1] == '-' || curPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(curPtr[2])))) {
curPtr += 2;
while (isdigit(*curPtr))
++curPtr;
}
}
return formToken(Token::floatliteral, tokStart);
}
/// Lex an identifier that starts with a prefix followed by suffix-id.
@ -300,52 +360,6 @@ Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
return formToken(kind, tokStart);
}
/// Lex a number literal.
///
/// integer-literal ::= digit+ | `0x` hex_digit+
/// float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
///
Token Lexer::lexNumber(const char *tokStart) {
assert(isdigit(curPtr[-1]));
// Handle the hexadecimal case.
if (curPtr[-1] == '0' && *curPtr == 'x') {
// If we see stuff like 0xi32, this is a literal `0` follwed by an
// identifier `xi32`, stop after `0`.
if (!isxdigit(curPtr[1]))
return formToken(Token::integer, tokStart);
curPtr += 2;
while (isxdigit(*curPtr))
++curPtr;
return formToken(Token::integer, tokStart);
}
// Handle the normal decimal case.
while (isdigit(*curPtr))
++curPtr;
if (*curPtr != '.')
return formToken(Token::integer, tokStart);
++curPtr;
// Skip over [0-9]*([eE][-+]?[0-9]+)?
while (isdigit(*curPtr))
++curPtr;
if (*curPtr == 'e' || *curPtr == 'E') {
if (isdigit(static_cast<unsigned char>(curPtr[1])) ||
((curPtr[1] == '-' || curPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(curPtr[2])))) {
curPtr += 2;
while (isdigit(*curPtr))
++curPtr;
}
}
return formToken(Token::floatliteral, tokStart);
}
/// Lex a string literal.
///
/// string-literal ::= '"' [^"\n\f\v\r]* '"'
@ -384,17 +398,3 @@ Token Lexer::lexString(const char *tokStart) {
}
}
}
/// Lex an ellipsis.
///
/// ellipsis ::= '...'
///
Token Lexer::lexEllipsis(const char *tokStart) {
assert(curPtr[-1] == '.');
if (curPtr == curBuffer.end() || *curPtr != '.' || *(curPtr + 1) != '.')
return emitError(curPtr, "expected three consecutive dots for an ellipsis");
curPtr += 2;
return formToken(Token::ellipsis, tokStart);
}

View File

@ -54,13 +54,13 @@ private:
Token emitError(const char *loc, const Twine &message);
// Lexer implementation methods.
Token lexComment();
Token lexBareIdentifierOrKeyword(const char *tokStart);
Token lexAtIdentifier(const char *tokStart);
Token lexPrefixedIdentifier(const char *tokStart);
Token lexNumber(const char *tokStart);
Token lexString(const char *tokStart);
Token lexBareIdentifierOrKeyword(const char *tokStart);
Token lexComment();
Token lexEllipsis(const char *tokStart);
Token lexNumber(const char *tokStart);
Token lexPrefixedIdentifier(const char *tokStart);
Token lexString(const char *tokStart);
const llvm::SourceMgr &sourceMgr;
MLIRContext *context;