forked from OSchip/llvm-project
226 lines
7.5 KiB
C++
226 lines
7.5 KiB
C++
//===- FormatGen.cpp - Utilities for custom assembly formats ----*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "FormatGen.h"
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include "llvm/TableGen/Error.h"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::tblgen;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// FormatToken
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
llvm::SMLoc FormatToken::getLoc() const {
|
|
return llvm::SMLoc::getFromPointer(spelling.data());
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// FormatLexer
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc)
|
|
: mgr(mgr), loc(loc),
|
|
curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()),
|
|
curPtr(curBuffer.begin()) {}
|
|
|
|
FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) {
|
|
mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
|
|
llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
|
|
"in custom assembly format for this operation");
|
|
return formToken(FormatToken::error, loc.getPointer());
|
|
}
|
|
|
|
FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) {
|
|
return emitError(llvm::SMLoc::getFromPointer(loc), msg);
|
|
}
|
|
|
|
FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
|
|
const Twine ¬e) {
|
|
mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
|
|
llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
|
|
"in custom assembly format for this operation");
|
|
mgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note);
|
|
return formToken(FormatToken::error, loc.getPointer());
|
|
}
|
|
|
|
int FormatLexer::getNextChar() {
|
|
char curChar = *curPtr++;
|
|
switch (curChar) {
|
|
default:
|
|
return (unsigned char)curChar;
|
|
case 0: {
|
|
// A nul character in the stream is either the end of the current buffer or
|
|
// a random nul in the file. Disambiguate that here.
|
|
if (curPtr - 1 != curBuffer.end())
|
|
return 0;
|
|
|
|
// Otherwise, return end of file.
|
|
--curPtr;
|
|
return EOF;
|
|
}
|
|
case '\n':
|
|
case '\r':
|
|
// Handle the newline character by ignoring it and incrementing the line
|
|
// count. However, be careful about 'dos style' files with \n\r in them.
|
|
// Only treat a \n\r or \r\n as a single line.
|
|
if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
|
|
++curPtr;
|
|
return '\n';
|
|
}
|
|
}
|
|
|
|
FormatToken FormatLexer::lexToken() {
|
|
const char *tokStart = curPtr;
|
|
|
|
// This always consumes at least one character.
|
|
int curChar = getNextChar();
|
|
switch (curChar) {
|
|
default:
|
|
// Handle identifiers: [a-zA-Z_]
|
|
if (isalpha(curChar) || curChar == '_')
|
|
return lexIdentifier(tokStart);
|
|
|
|
// Unknown character, emit an error.
|
|
return emitError(tokStart, "unexpected character");
|
|
case EOF:
|
|
// Return EOF denoting the end of lexing.
|
|
return formToken(FormatToken::eof, tokStart);
|
|
|
|
// Lex punctuation.
|
|
case '^':
|
|
return formToken(FormatToken::caret, tokStart);
|
|
case ':':
|
|
return formToken(FormatToken::colon, tokStart);
|
|
case ',':
|
|
return formToken(FormatToken::comma, tokStart);
|
|
case '=':
|
|
return formToken(FormatToken::equal, tokStart);
|
|
case '<':
|
|
return formToken(FormatToken::less, tokStart);
|
|
case '>':
|
|
return formToken(FormatToken::greater, tokStart);
|
|
case '?':
|
|
return formToken(FormatToken::question, tokStart);
|
|
case '(':
|
|
return formToken(FormatToken::l_paren, tokStart);
|
|
case ')':
|
|
return formToken(FormatToken::r_paren, tokStart);
|
|
case '*':
|
|
return formToken(FormatToken::star, tokStart);
|
|
|
|
// Ignore whitespace characters.
|
|
case 0:
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
return lexToken();
|
|
|
|
case '`':
|
|
return lexLiteral(tokStart);
|
|
case '$':
|
|
return lexVariable(tokStart);
|
|
}
|
|
}
|
|
|
|
FormatToken FormatLexer::lexLiteral(const char *tokStart) {
|
|
assert(curPtr[-1] == '`');
|
|
|
|
// Lex a literal surrounded by ``.
|
|
while (const char curChar = *curPtr++) {
|
|
if (curChar == '`')
|
|
return formToken(FormatToken::literal, tokStart);
|
|
}
|
|
return emitError(curPtr - 1, "unexpected end of file in literal");
|
|
}
|
|
|
|
FormatToken FormatLexer::lexVariable(const char *tokStart) {
|
|
if (!isalpha(curPtr[0]) && curPtr[0] != '_')
|
|
return emitError(curPtr - 1, "expected variable name");
|
|
|
|
// Otherwise, consume the rest of the characters.
|
|
while (isalnum(*curPtr) || *curPtr == '_')
|
|
++curPtr;
|
|
return formToken(FormatToken::variable, tokStart);
|
|
}
|
|
|
|
FormatToken FormatLexer::lexIdentifier(const char *tokStart) {
|
|
// Match the rest of the identifier regex: [0-9a-zA-Z_\-]*
|
|
while (isalnum(*curPtr) || *curPtr == '_' || *curPtr == '-')
|
|
++curPtr;
|
|
|
|
// Check to see if this identifier is a keyword.
|
|
StringRef str(tokStart, curPtr - tokStart);
|
|
auto kind =
|
|
StringSwitch<FormatToken::Kind>(str)
|
|
.Case("attr-dict", FormatToken::kw_attr_dict)
|
|
.Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword)
|
|
.Case("custom", FormatToken::kw_custom)
|
|
.Case("functional-type", FormatToken::kw_functional_type)
|
|
.Case("operands", FormatToken::kw_operands)
|
|
.Case("params", FormatToken::kw_params)
|
|
.Case("ref", FormatToken::kw_ref)
|
|
.Case("regions", FormatToken::kw_regions)
|
|
.Case("results", FormatToken::kw_results)
|
|
.Case("struct", FormatToken::kw_struct)
|
|
.Case("successors", FormatToken::kw_successors)
|
|
.Case("type", FormatToken::kw_type)
|
|
.Default(FormatToken::identifier);
|
|
return FormatToken(kind, str);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Utility Functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value,
|
|
bool lastWasPunctuation) {
|
|
if (value.size() != 1 && value != "->")
|
|
return true;
|
|
if (lastWasPunctuation)
|
|
return !StringRef(">)}],").contains(value.front());
|
|
return !StringRef("<>(){}[],").contains(value.front());
|
|
}
|
|
|
|
bool mlir::tblgen::canFormatStringAsKeyword(StringRef value) {
|
|
if (!isalpha(value.front()) && value.front() != '_')
|
|
return false;
|
|
return llvm::all_of(value.drop_front(), [](char c) {
|
|
return isalnum(c) || c == '_' || c == '$' || c == '.';
|
|
});
|
|
}
|
|
|
|
bool mlir::tblgen::isValidLiteral(StringRef value) {
|
|
if (value.empty())
|
|
return false;
|
|
char front = value.front();
|
|
|
|
// If there is only one character, this must either be punctuation or a
|
|
// single character bare identifier.
|
|
if (value.size() == 1)
|
|
return isalpha(front) || StringRef("_:,=<>()[]{}?+*").contains(front);
|
|
|
|
// Check the punctuation that are larger than a single character.
|
|
if (value == "->")
|
|
return true;
|
|
|
|
// Otherwise, this must be an identifier.
|
|
return canFormatStringAsKeyword(value);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Commandline Options
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
llvm::cl::opt<bool> mlir::tblgen::formatErrorIsFatal(
|
|
"asmformat-error-is-fatal",
|
|
llvm::cl::desc("Emit a fatal error if format parsing fails"),
|
|
llvm::cl::init(true));
|