llvm-project/mlir/lib/Parser/Token.h

//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_LIB_PARSER_TOKEN_H
#define MLIR_LIB_PARSER_TOKEN_H

#include "mlir/Support/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SMLoc.h"

namespace mlir {

/// This represents a token in the MLIR syntax.
class Token {
public:
  enum Kind {
#define TOK_MARKER(NAME) NAME,
#define TOK_IDENTIFIER(NAME) NAME,
#define TOK_LITERAL(NAME) NAME,
#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
#include "TokenKinds.def"
  };

  Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}

  // Return the bytes that make up this token.
  StringRef getSpelling() const { return spelling; }

  // Token classification.
  Kind getKind() const { return kind; }
  bool is(Kind K) const { return kind == K; }

  bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }

  /// Return true if this token is one of the specified kinds.
  template <typename... T>
  bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
    if (is(k1))
      return true;
    return isAny(k2, k3, others...);
  }

  bool isNot(Kind k) const { return kind != k; }

  /// Return true if this token isn't one of the specified kinds.
  template <typename... T>
  bool isNot(Kind k1, Kind k2, T... others) const {
    return !isAny(k1, k2, others...);
  }

  /// Return true if this is one of the keyword token kinds (e.g. kw_if).
  bool isKeyword() const;

  // Helpers to decode specific sorts of tokens.

  /// For an integer token, return its value as an unsigned.  If it doesn't fit,
  /// return None.
  Optional<unsigned> getUnsignedIntegerValue() const;

  /// For an integer token, return its value as an uint64_t.  If it doesn't fit,
  /// return None.
  static Optional<uint64_t> getUInt64IntegerValue(StringRef spelling);
  Optional<uint64_t> getUInt64IntegerValue() const {
    return getUInt64IntegerValue(getSpelling());
  }

  /// For a floatliteral token, return its value as a double. Returns None in
  /// the case of underflow or overflow.
  Optional<double> getFloatingPointValue() const;

  /// For an inttype token, return its bitwidth.
  Optional<unsigned> getIntTypeBitwidth() const;

  /// For an inttype token, return its signedness semantics: llvm::None means no
  /// signedness semantics; true means signed integer type; false means unsigned
  /// integer type.
  Optional<bool> getIntTypeSignedness() const;

  /// Given a hash_identifier token like #123, try to parse the number out of
  /// the identifier, returning None if it is a named identifier like #x or
  /// if the integer doesn't fit.
  Optional<unsigned> getHashIdentifierNumber() const;

  /// Given a token containing a string literal, return its value, including
  /// removing the quote characters and unescaping the contents of the string.
  std::string getStringValue() const;

  /// Given a token containing a hex string literal, return its value or None if
  /// the token does not contain a valid hex string. A hex string literal is a
  /// string starting with `0x` and only containing hex digits.
  Optional<std::string> getHexStringValue() const;

  /// Given a token containing a symbol reference, return the unescaped string
  /// value.
  std::string getSymbolReference() const;

  // Location processing.
  llvm::SMLoc getLoc() const;
  llvm::SMLoc getEndLoc() const;
  llvm::SMRange getLocRange() const;

  /// Given a punctuation or keyword token kind, return the spelling of the
  /// token as a string.  Warning: This will abort on markers, identifiers and
  /// literal tokens since they have no fixed spelling.
  static StringRef getTokenSpelling(Kind kind);

private:
  /// Discriminator that indicates the sort of token this is.
  Kind kind;

  /// A reference to the entire token contents; this is always a pointer into
  /// a memory buffer owned by the source manager.
  StringRef spelling;
};

} // end namespace mlir

#endif // MLIR_LIB_PARSER_TOKEN_H
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`//===- Token.h - MLIR Token Interface ---------------------------- C++ --===//`
			`//`
Mass update the MLIR license header to mention "Part of the LLVM project" This is an artifact from merging MLIR into LLVM, the file headers are now aligned with the rest of the project. 2020-01-26 11:58:30 +08:00			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
Adjust License.txt file to use the LLVM license PiperOrigin-RevId: 286906740 2019-12-24 01:35:36 +08:00			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`//`
Adjust License.txt file to use the LLVM license PiperOrigin-RevId: 286906740 2019-12-24 01:35:36 +08:00			`//===----------------------------------------------------------------------===//`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
			`#ifndef MLIR_LIB_PARSER_TOKEN_H`
			`#define MLIR_LIB_PARSER_TOKEN_H`

			`#include "mlir/Support/LLVM.h"`
			`#include "llvm/ADT/StringRef.h"`
			`#include "llvm/Support/SMLoc.h"`

			`namespace mlir {`

			`/// This represents a token in the MLIR syntax.`
			`class Token {`
			`public:`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`enum Kind {`
			`#define TOK_MARKER(NAME) NAME,`
			`#define TOK_IDENTIFIER(NAME) NAME,`
			`#define TOK_LITERAL(NAME) NAME,`
			`#define TOK_PUNCTUATION(NAME, SPELLING) NAME,`
			`#define TOK_KEYWORD(SPELLING) kw_##SPELLING,`
			`#include "TokenKinds.def"`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`};`

Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
			`// Return the bytes that make up this token.`
			`StringRef getSpelling() const { return spelling; }`

			`// Token classification.`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`Kind getKind() const { return kind; }`
			`bool is(Kind K) const { return kind == K; }`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`bool isAny(Kind k1, Kind k2) const { return is(k1) \|\| is(k2); }`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
			`/// Return true if this token is one of the specified kinds.`
Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`template <typename... T>`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`if (is(k1))`
			`return true;`
			`return isAny(k2, k3, others...);`
			`}`

Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`bool isNot(Kind k) const { return kind != k; }`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
			`/// Return true if this token isn't one of the specified kinds.`
Fix a bug in the .mlir lexer, where a \0 character in a file is treated as a colon (due to an accidental fall through) instead of whitespace. Summary: While here, simplify the lexer a bit by eliminating the unneeded 'operator' classification of certain sigils, they can just be treated as 'punctuation'. Reviewers: rriddle! Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D76647 2020-03-24 06:39:32 +08:00			`template <typename... T>`
			`bool isNot(Kind k1, Kind k2, T... others) const {`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`return !isAny(k1, k2, others...);`
			`}`

Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`/// Return true if this is one of the keyword token kinds (e.g. kw_if).`
			`bool isKeyword() const;`

Implement parser and lexer support for most of the type grammar. Semi-affine maps and address spaces are not yet supported (someone want to take this on?). We also don't generate IR objects for types yet, which I plan to tackle next. PiperOrigin-RevId: 201754283 2018-06-23 06:52:02 +08:00			`// Helpers to decode specific sorts of tokens.`

			`/// For an integer token, return its value as an unsigned. If it doesn't fit,`
			`/// return None.`
Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00			`Optional<unsigned> getUnsignedIntegerValue() const;`

Parse ML function arguments, return statement operands, and for statement loop header. Loop bounds and presumed to be constants for now and are stored in ForStmt as affine constant expressions. ML function arguments, return statement operands and loop variable name are dropped for now. PiperOrigin-RevId: 205256208 2018-07-20 00:52:39 +08:00			`/// For an integer token, return its value as an uint64_t. If it doesn't fit,`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00			`/// return None.`
Fix the MLIR integer attribute parser to be correct in the face of large integer attributes, it was previously artificially limited to 64 bits. Reviewers: rriddle! Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, grosul1, frgossen, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78065 2020-04-14 07:37:00 +08:00			`static Optional<uint64_t> getUInt64IntegerValue(StringRef spelling);`
			`Optional<uint64_t> getUInt64IntegerValue() const {`
			`return getUInt64IntegerValue(getSpelling());`
			`}`
Add parsing for attributes and attibutes on operations. Add IR representation for attributes on operations. Split Operation out from OperationInst so it can be shared with OperationStmt one day. PiperOrigin-RevId: 203325366 2018-07-05 11:45:39 +08:00
Add parsing for floating point attributes. This is doing it in a suboptimal manner by recombining [integer period literal] into a string literal and parsing that via to_float. PiperOrigin-RevId: 206855106 2018-08-01 08:15:15 +08:00			`/// For a floatliteral token, return its value as a double. Returns None in`
			`/// the case of underflow or overflow.`
			`Optional<double> getFloatingPointValue() const;`

Enhance the type system to support arbitrary precision integers, which are important for low-bitwidth inference cases and hardware synthesis targets. Rename 'int' to 'affineint' to avoid confusion between "the integers" and "the int type". PiperOrigin-RevId: 202751508 2018-06-30 13:08:05 +08:00			`/// For an inttype token, return its bitwidth.`
			`Optional<unsigned> getIntTypeBitwidth() const;`

[mlir] Add a signedness semantics bit to IntegerType Thus far IntegerType has been signless: a value of IntegerType does not have a sign intrinsically and it's up to the specific operation to decide how to interpret those bits. For example, std.addi does two's complement arithmetic, and std.divis/std.diviu treats the first bit as a sign. This design choice was made some time ago when we did't have lots of dialects and dialects were more rigid. Today we have much more extensible infrastructure and different dialect may want different modelling over integer signedness. So while we can say we want signless integers in the standard dialect, we cannot dictate for others. Requiring each dialect to model the signedness semantics with another set of custom types is duplicating the functionality everywhere, considering the fundamental role integer types play. This CL extends the IntegerType with a signedness semantics bit. This gives each dialect an option to opt in signedness semantics if that's what they want and helps code sharing. The parser is modified to recognize `si[1-9][0-9]` and `ui[1-9][0-9]` as signed and unsigned integer types, respectively, leaving the original `i[1-9][0-9]*` to continue to mean no indication over signedness semantics. All existing dialects are not affected (yet) as this is a feature to opt in. More discussions can be found at: https://groups.google.com/a/tensorflow.org/d/msg/mlir/XmkV8HOPWpo/7O4X0Nb_AQAJ Differential Revision: https://reviews.llvm.org/D72533 2020-01-11 03:48:24 +08:00			`/// For an inttype token, return its signedness semantics: llvm::None means no`
			`/// signedness semantics; true means signed integer type; false means unsigned`
			`/// integer type.`
			`Optional<bool> getIntTypeSignedness() const;`

Add support for multiple results to the printer/parser, add support for forward references to the parser, add initial support for SSA use-list iteration and RAUW. PiperOrigin-RevId: 205484031 2018-07-21 09:41:34 +08:00			`/// Given a hash_identifier token like #123, try to parse the number out of`
			`/// the identifier, returning None if it is a named identifier like #x or`
			`/// if the integer doesn't fit.`
			`Optional<unsigned> getHashIdentifierNumber() const;`

Add support for parsing/printing non bare-identifier SymbolRefs. The restriction that symbols can only have identifier names is arbitrary, and artificially limits the names that a symbol may have. This change adds support for parsing and printing symbols that don't fit in the 'bare-identifier' grammar by printing the reference in quotes, e.g. @"0_my_reference" can now be used as a symbol name. PiperOrigin-RevId: 273644768 2019-10-09 08:44:39 +08:00			`/// Given a token containing a string literal, return its value, including`
			`/// removing the quote characters and unescaping the contents of the string.`
Sketch out parser/IR support for OperationInst, and a new Instruction base class. Introduce an Identifier class to MLIRContext to represent uniqued identifiers, introduce string literal support to the lexer, introducing parser and printer support etc. PiperOrigin-RevId: 202592007 2018-06-29 11:45:33 +08:00			`std::string getStringValue() const;`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
[mlir] Optimize the parsing of ElementsAttr hex strings This revision optimizes the parsing of hex strings by using the checked variant of llvm::fromHex, and adding a specialized method to Token for extracting hex strings. This leads a large decrease in compile time when parsing large hex constants (one example: 2.6 seconds -> 370 miliseconds) Differential Revision: https://reviews.llvm.org/D90266 2020-10-29 07:46:38 +08:00			`/// Given a token containing a hex string literal, return its value or None if`
			`/// the token does not contain a valid hex string. A hex string literal is a`
			/// string starting with `0x` and only containing hex digits.
			`Optional<std::string> getHexStringValue() const;`

[mlir][NFC] Split Parser into several different files. Summary: At this point Parser has grown to be over 5000 lines and can be very difficult to navigate/update/etc. This commit splits Parser.cpp into several sub files focused on parsing specific types of entities; e.g., Attributes, Types, etc. Differential Revision: https://reviews.llvm.org/D81299 2020-06-11 07:58:55 +08:00			`/// Given a token containing a symbol reference, return the unescaped string`
			`/// value.`
			`std::string getSymbolReference() const;`

Implement parser and lexer support for most of the type grammar. Semi-affine maps and address spaces are not yet supported (someone want to take this on?). We also don't generate IR objects for types yet, which I plan to tackle next. PiperOrigin-RevId: 201754283 2018-06-23 06:52:02 +08:00			`// Location processing.`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`llvm::SMLoc getLoc() const;`
			`llvm::SMLoc getEndLoc() const;`
			`llvm::SMRange getLocRange() const;`

Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`/// Given a punctuation or keyword token kind, return the spelling of the`
			`/// token as a string. Warning: This will abort on markers, identifiers and`
			`/// literal tokens since they have no fixed spelling.`
			`static StringRef getTokenSpelling(Kind kind);`

Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00			`private:`
			`/// Discriminator that indicates the sort of token this is.`
Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858 2018-06-30 02:15:56 +08:00			`Kind kind;`
Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 2018-06-23 01:39:19 +08:00
			`/// A reference to the entire token contents; this is always a pointer into`
			`/// a memory buffer owned by the source manager.`
			`StringRef spelling;`
			`};`

			`} // end namespace mlir`

Update style/clang-format (NFC). Update to be consistent & so that future save + clang-format workflows don't introduce extra changes. PiperOrigin-RevId: 259361174 2019-07-23 01:51:40 +08:00			`#endif // MLIR_LIB_PARSER_TOKEN_H`