llvm-project/mlir/lib/Parser/Token.cpp

//===- Token.cpp - MLIR Token Implementation ------------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the Token class for the MLIR textual form.
//
//===----------------------------------------------------------------------===//

#include "Token.h"
#include "llvm/ADT/StringExtras.h"
using namespace mlir;
using llvm::SMLoc;
using llvm::SMRange;

SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }

SMLoc Token::getEndLoc() const {
  return SMLoc::getFromPointer(spelling.data() + spelling.size());
}

SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }

/// For an integer token, return its value as an unsigned.  If it doesn't fit,
/// return None.
Optional<unsigned> Token::getUnsignedIntegerValue() const {
  bool isHex = spelling.size() > 1 && spelling[1] == 'x';

  unsigned result = 0;
  if (spelling.getAsInteger(isHex ? 0 : 10, result))
    return None;
  return result;
}

/// For an integer token, return its value as a uint64_t.  If it doesn't fit,
/// return None.
Optional<uint64_t> Token::getUInt64IntegerValue() const {
  bool isHex = spelling.size() > 1 && spelling[1] == 'x';

  uint64_t result = 0;
  if (spelling.getAsInteger(isHex ? 0 : 10, result))
    return None;
  return result;
}

/// For a floatliteral, return its value as a double. Return None if the value
/// underflows or overflows.
Optional<double> Token::getFloatingPointValue() const {
  double result = 0;
  if (spelling.getAsDouble(result))
    return None;
  return result;
}

/// For an inttype token, return its bitwidth.
Optional<unsigned> Token::getIntTypeBitwidth() const {
  unsigned result = 0;
  if (spelling[1] == '0' || spelling.drop_front().getAsInteger(10, result) ||
      result == 0)
    return None;
  return result;
}

/// Given a 'string' token, return its value, including removing the quote
/// characters and unescaping the contents of the string.  The lexer has already
/// verified that this token is valid.
std::string Token::getStringValue() const {
  assert(getKind() == string);
  // Start by dropping the quotes.
  StringRef bytes = getSpelling().drop_front().drop_back();

  std::string result;
  result.reserve(bytes.size());
  for (unsigned i = 0, e = bytes.size(); i != e;) {
    auto c = bytes[i++];
    if (c != '\\') {
      result.push_back(c);
      continue;
    }

    assert(i + 1 <= e && "invalid string should be caught by lexer");
    auto c1 = bytes[i++];
    switch (c1) {
    case '"':
    case '\\':
      result.push_back(c1);
      continue;
    case 'n':
      result.push_back('\n');
      continue;
    case 't':
      result.push_back('\t');
      continue;
    default:
      break;
    }

    assert(i + 1 <= e && "invalid string should be caught by lexer");
    auto c2 = bytes[i++];

    assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
    result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
  }

  return result;
}

/// Given a hash_identifier token like #123, try to parse the number out of
/// the identifier, returning None if it is a named identifier like #x or
/// if the integer doesn't fit.
Optional<unsigned> Token::getHashIdentifierNumber() const {
  assert(getKind() == hash_identifier);
  unsigned result = 0;
  if (spelling.drop_front().getAsInteger(10, result))
    return None;
  return result;
}

/// Given a punctuation or keyword token kind, return the spelling of the
/// token as a string.  Warning: This will abort on markers, identifiers and
/// literal tokens since they have no fixed spelling.
StringRef Token::getTokenSpelling(Kind kind) {
  switch (kind) {
  default:
    llvm_unreachable("This token kind has no fixed spelling");
#define TOK_PUNCTUATION(NAME, SPELLING)                                        \
  case NAME:                                                                   \
    return SPELLING;
#define TOK_OPERATOR(NAME, SPELLING)                                           \
  case NAME:                                                                   \
    return SPELLING;
#define TOK_KEYWORD(SPELLING)                                                  \
  case kw_##SPELLING:                                                          \
    return #SPELLING;
#include "TokenKinds.def"
  }
}

/// Return true if this is one of the keyword token kinds (e.g. kw_if).
bool Token::isKeyword() const {
  switch (kind) {
  default:
    return false;
#define TOK_KEYWORD(SPELLING)                                                  \
  case kw_##SPELLING:                                                          \
    return true;
#include "TokenKinds.def"
  }
}
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`//===- Token.cpp - MLIR Token Implementation ------------------------------===//`
			`//`
			`// Copyright 2019 The MLIR Authors.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`// =============================================================================`
			`//`
			`// This file implements the Token class for the MLIR textual form.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "Token.h"`
Escape and unescape strings in the parser and printer so they can roundtrip, print floating point in a structured form that we know can round trip, enumerate attributes in the visitor so we print affine mapping attributes symbolically (the majority of the testcase updates). We still have an issue where the hexadecimal floating point syntax is reparsed as an integer, but that can evolve in subsequent patches. PiperOrigin-RevId: 208828876 2018-08-16 00:09:54 +08:00			`#include "llvm/ADT/StringExtras.h"`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`using namespace mlir;`
			`using llvm::SMLoc;`
			`using llvm::SMRange;`

Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
			`SMLoc Token::getEndLoc() const {`
			`return SMLoc::getFromPointer(spelling.data() + spelling.size());`
			`}`

Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }`
Implement parser and lexer support for most of the type grammar. Semi-affine maps and address spaces are not yet supported (someone want to take this on?). We also don't generate IR objects for types yet, which I plan to tackle next. PiperOrigin-RevId: 201754283 2018-06-23 06:52:02 +08:00
			`/// For an integer token, return its value as an unsigned. If it doesn't fit,`
			`/// return None.`
Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00			`Optional<unsigned> Token::getUnsignedIntegerValue() const {`
Implement parser and lexer support for most of the type grammar. Semi-affine maps and address spaces are not yet supported (someone want to take this on?). We also don't generate IR objects for types yet, which I plan to tackle next. PiperOrigin-RevId: 201754283 2018-06-23 06:52:02 +08:00			`bool isHex = spelling.size() > 1 && spelling[1] == 'x';`

			`unsigned result = 0;`
			`if (spelling.getAsInteger(isHex ? 0 : 10, result))`
			`return None;`
			`return result;`
			`}`
Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`/// For an integer token, return its value as a uint64_t. If it doesn't fit,`
			`/// return None.`
			`Optional<uint64_t> Token::getUInt64IntegerValue() const {`
			`bool isHex = spelling.size() > 1 && spelling[1] == 'x';`

			`uint64_t result = 0;`
			`if (spelling.getAsInteger(isHex ? 0 : 10, result))`
			`return None;`
			`return result;`
			`}`

Add parsing for floating point attributes. This is doing it in a suboptimal manner by recombining [integer period literal] into a string literal and parsing that via to_float. PiperOrigin-RevId: 206855106 2018-08-01 08:15:15 +08:00			`/// For a floatliteral, return its value as a double. Return None if the value`
			`/// underflows or overflows.`
			`Optional<double> Token::getFloatingPointValue() const {`
			`double result = 0;`
			`if (spelling.getAsDouble(result))`
			`return None;`
			`return result;`
			`}`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00
Enhance the type system to support arbitrary precision integers, which are important for low-bitwidth inference cases and hardware synthesis targets. Rename 'int' to 'affineint' to avoid confusion between "the integers" and "the int type". PiperOrigin-RevId: 202751508 2018-06-30 13:08:05 +08:00			`/// For an inttype token, return its bitwidth.`
			`Optional<unsigned> Token::getIntTypeBitwidth() const {`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`unsigned result = 0;`
Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`if (spelling[1] == '0' \|\| spelling.drop_front().getAsInteger(10, result) \|\|`
			`result == 0)`
Enhance the type system to support arbitrary precision integers, which are important for low-bitwidth inference cases and hardware synthesis targets. Rename 'int' to 'affineint' to avoid confusion between "the integers" and "the int type". PiperOrigin-RevId: 202751508 2018-06-30 13:08:05 +08:00			`return None;`
			`return result;`
			`}`

Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00			`/// Given a 'string' token, return its value, including removing the quote`
Escape and unescape strings in the parser and printer so they can roundtrip, print floating point in a structured form that we know can round trip, enumerate attributes in the visitor so we print affine mapping attributes symbolically (the majority of the testcase updates). We still have an issue where the hexadecimal floating point syntax is reparsed as an integer, but that can evolve in subsequent patches. PiperOrigin-RevId: 208828876 2018-08-16 00:09:54 +08:00			`/// characters and unescaping the contents of the string. The lexer has already`
			`/// verified that this token is valid.`
Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00			`std::string Token::getStringValue() const {`
Escape and unescape strings in the parser and printer so they can roundtrip, print floating point in a structured form that we know can round trip, enumerate attributes in the visitor so we print affine mapping attributes symbolically (the majority of the testcase updates). We still have an issue where the hexadecimal floating point syntax is reparsed as an integer, but that can evolve in subsequent patches. PiperOrigin-RevId: 208828876 2018-08-16 00:09:54 +08:00			`assert(getKind() == string);`
[mlir] Allow C-style escapes in Lexer This patch passes the raw, unescaped value through to the rest of the stack. Partial escaping is a total pain to deal with, so we either need to implement escaping properly (ideally using a third party library like absl, I don't think LLVM has one that can handle the proper gamut of escape codes) or don't escape. I chose the latter for this patch. PiperOrigin-RevId: 208608945 2018-08-14 16:16:45 +08:00			`// Start by dropping the quotes.`
Escape and unescape strings in the parser and printer so they can roundtrip, print floating point in a structured form that we know can round trip, enumerate attributes in the visitor so we print affine mapping attributes symbolically (the majority of the testcase updates). We still have an issue where the hexadecimal floating point syntax is reparsed as an integer, but that can evolve in subsequent patches. PiperOrigin-RevId: 208828876 2018-08-16 00:09:54 +08:00			`StringRef bytes = getSpelling().drop_front().drop_back();`

			`std::string result;`
			`result.reserve(bytes.size());`
			`for (unsigned i = 0, e = bytes.size(); i != e;) {`
			`auto c = bytes[i++];`
			`if (c != '\\') {`
			`result.push_back(c);`
			`continue;`
			`}`

Fix an invalid assert when processing escaped strings. The assert assumed that the escaped character could not appear at the end of the string. Fixes tensorflow/mlir#117 PiperOrigin-RevId: 266975471 2019-09-04 02:27:00 +08:00			`assert(i + 1 <= e && "invalid string should be caught by lexer");`
Escape and unescape strings in the parser and printer so they can roundtrip, print floating point in a structured form that we know can round trip, enumerate attributes in the visitor so we print affine mapping attributes symbolically (the majority of the testcase updates). We still have an issue where the hexadecimal floating point syntax is reparsed as an integer, but that can evolve in subsequent patches. PiperOrigin-RevId: 208828876 2018-08-16 00:09:54 +08:00			`auto c1 = bytes[i++];`
			`switch (c1) {`
			`case '"':`
			`case '\\':`
			`result.push_back(c1);`
			`continue;`
			`case 'n':`
			`result.push_back('\n');`
			`continue;`
			`case 't':`
			`result.push_back('\t');`
			`continue;`
			`default:`
			`break;`
			`}`

[mlir] Fix tests after Chris implemented string escaping in MLIR We don't need to C-escape any more, so don't. Also, change the expected escaping syntax to be the slightly noisier version that LLVM emits. PiperOrigin-RevId: 208989483 2018-08-16 23:43:55 +08:00			`assert(i + 1 <= e && "invalid string should be caught by lexer");`
Escape and unescape strings in the parser and printer so they can roundtrip, print floating point in a structured form that we know can round trip, enumerate attributes in the visitor so we print affine mapping attributes symbolically (the majority of the testcase updates). We still have an issue where the hexadecimal floating point syntax is reparsed as an integer, but that can evolve in subsequent patches. PiperOrigin-RevId: 208828876 2018-08-16 00:09:54 +08:00			`auto c2 = bytes[i++];`

			`assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");`
			`result.push_back((llvm::hexDigitValue(c1) << 4) \| llvm::hexDigitValue(c2));`
			`}`

			`return result;`
Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00			`}`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00
Add support for multiple results to the printer/parser, add support for forward references to the parser, add initial support for SSA use-list iteration and RAUW. PiperOrigin-RevId: 205484031 2018-07-21 09:41:34 +08:00			`/// Given a hash_identifier token like #123, try to parse the number out of`
			`/// the identifier, returning None if it is a named identifier like #x or`
			`/// if the integer doesn't fit.`
			`Optional<unsigned> Token::getHashIdentifierNumber() const {`
			`assert(getKind() == hash_identifier);`
			`unsigned result = 0;`
			`if (spelling.drop_front().getAsInteger(10, result))`
			`return None;`
			`return result;`
			`}`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00
			`/// Given a punctuation or keyword token kind, return the spelling of the`
			`/// token as a string. Warning: This will abort on markers, identifiers and`
			`/// literal tokens since they have no fixed spelling.`
			`StringRef Token::getTokenSpelling(Kind kind) {`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`switch (kind) {`
Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`default:`
			`llvm_unreachable("This token kind has no fixed spelling");`
			`#define TOK_PUNCTUATION(NAME, SPELLING) \`
			`case NAME: \`
			`return SPELLING;`
			`#define TOK_OPERATOR(NAME, SPELLING) \`
			`case NAME: \`
			`return SPELLING;`
			`#define TOK_KEYWORD(SPELLING) \`
			`case kw_##SPELLING: \`
			`return #SPELLING;`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`#include "TokenKinds.def"`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`}`
			`}`

			`/// Return true if this is one of the keyword token kinds (e.g. kw_if).`
			`bool Token::isKeyword() const {`
			`switch (kind) {`
Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`default:`
			`return false;`
			`#define TOK_KEYWORD(SPELLING) \`
			`case kw_##SPELLING: \`
			`return true;`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`#include "TokenKinds.def"`
			`}`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`}`