forked from OSchip/llvm-project
382 lines
15 KiB
C++
382 lines
15 KiB
C++
//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This class represents the Lexer for tablegen files.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
|
|
#define LLVM_LIB_TABLEGEN_TGLEXER_H
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/ADT/StringSet.h"
|
|
#include "llvm/Support/DataTypes.h"
|
|
#include "llvm/Support/SMLoc.h"
|
|
#include <cassert>
|
|
#include <memory>
|
|
#include <set>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
template <typename T> class ArrayRef;
|
|
class SourceMgr;
|
|
class Twine;
|
|
|
|
namespace tgtok {
|
|
enum TokKind {
|
|
// Markers
|
|
Eof, Error,
|
|
|
|
// Tokens with no info.
|
|
minus, plus, // - +
|
|
l_square, r_square, // [ ]
|
|
l_brace, r_brace, // { }
|
|
l_paren, r_paren, // ( )
|
|
less, greater, // < >
|
|
colon, semi, // : ;
|
|
comma, dot, // , .
|
|
equal, question, // = ?
|
|
paste, // #
|
|
dotdotdot, // ...
|
|
|
|
// Reserved keywords. ('ElseKW' is named to distinguish it from the
|
|
// existing 'Else' that means the preprocessor #else.)
|
|
Assert, Bit, Bits, Class, Code, Dag, Def, Defm, Defset, Defvar, ElseKW,
|
|
FalseKW, Field, Foreach, If, In, Include, Int, Let, List, MultiClass,
|
|
String, Then, TrueKW,
|
|
|
|
// Bang operators.
|
|
XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
|
|
XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind, XCast,
|
|
XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
|
|
XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
|
|
|
|
// Boolean literals.
|
|
TrueVal, FalseVal,
|
|
|
|
// Integer value.
|
|
IntVal,
|
|
|
|
// Binary constant. Note that these are sized according to the number of
|
|
// bits given.
|
|
BinaryIntVal,
|
|
|
|
// String valued tokens.
|
|
Id, StrVal, VarName, CodeFragment,
|
|
|
|
// Preprocessing tokens for internal usage by the lexer.
|
|
// They are never returned as a result of Lex().
|
|
Ifdef, Ifndef, Else, Endif, Define
|
|
};
|
|
}
|
|
|
|
/// TGLexer - TableGen Lexer class.
|
|
class TGLexer {
|
|
SourceMgr &SrcMgr;
|
|
|
|
const char *CurPtr = nullptr;
|
|
StringRef CurBuf;
|
|
|
|
// Information about the current token.
|
|
const char *TokStart = nullptr;
|
|
tgtok::TokKind CurCode = tgtok::TokKind::Eof;
|
|
std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
|
|
int64_t CurIntVal = 0; // This is valid for IntVal.
|
|
|
|
/// CurBuffer - This is the current buffer index we're lexing from as managed
|
|
/// by the SourceMgr object.
|
|
unsigned CurBuffer = 0;
|
|
|
|
public:
|
|
typedef std::set<std::string> DependenciesSetTy;
|
|
|
|
private:
|
|
/// Dependencies - This is the list of all included files.
|
|
DependenciesSetTy Dependencies;
|
|
|
|
public:
|
|
TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
|
|
|
|
tgtok::TokKind Lex() {
|
|
return CurCode = LexToken(CurPtr == CurBuf.begin());
|
|
}
|
|
|
|
const DependenciesSetTy &getDependencies() const {
|
|
return Dependencies;
|
|
}
|
|
|
|
tgtok::TokKind getCode() const { return CurCode; }
|
|
|
|
const std::string &getCurStrVal() const {
|
|
assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
|
|
CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
|
|
"This token doesn't have a string value");
|
|
return CurStrVal;
|
|
}
|
|
int64_t getCurIntVal() const {
|
|
assert(CurCode == tgtok::IntVal && "This token isn't an integer");
|
|
return CurIntVal;
|
|
}
|
|
std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
|
|
assert(CurCode == tgtok::BinaryIntVal &&
|
|
"This token isn't a binary integer");
|
|
return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
|
|
}
|
|
|
|
SMLoc getLoc() const;
|
|
|
|
private:
|
|
/// LexToken - Read the next token and return its code.
|
|
tgtok::TokKind LexToken(bool FileOrLineStart = false);
|
|
|
|
tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
|
|
tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
|
|
|
|
int getNextChar();
|
|
int peekNextChar(int Index) const;
|
|
void SkipBCPLComment();
|
|
bool SkipCComment();
|
|
tgtok::TokKind LexIdentifier();
|
|
bool LexInclude();
|
|
tgtok::TokKind LexString();
|
|
tgtok::TokKind LexVarName();
|
|
tgtok::TokKind LexNumber();
|
|
tgtok::TokKind LexBracket();
|
|
tgtok::TokKind LexExclaim();
|
|
|
|
// Process EOF encountered in LexToken().
|
|
// If EOF is met in an include file, then the method will update
|
|
// CurPtr, CurBuf and preprocessing include stack, and return true.
|
|
// If EOF is met in the top-level file, then the method will
|
|
// update and check the preprocessing include stack, and return false.
|
|
bool processEOF();
|
|
|
|
// *** Structures and methods for preprocessing support ***
|
|
|
|
// A set of macro names that are defined either via command line or
|
|
// by using:
|
|
// #define NAME
|
|
StringSet<> DefinedMacros;
|
|
|
|
// Each of #ifdef and #else directives has a descriptor associated
|
|
// with it.
|
|
//
|
|
// An ordered list of preprocessing controls defined by #ifdef/#else
|
|
// directives that are in effect currently is called preprocessing
|
|
// control stack. It is represented as a vector of PreprocessorControlDesc's.
|
|
//
|
|
// The control stack is updated according to the following rules:
|
|
//
|
|
// For each #ifdef we add an element to the control stack.
|
|
// For each #else we replace the top element with a descriptor
|
|
// with an inverted IsDefined value.
|
|
// For each #endif we pop the top element from the control stack.
|
|
//
|
|
// When CurPtr reaches the current buffer's end, the control stack
|
|
// must be empty, i.e. #ifdef and the corresponding #endif
|
|
// must be located in the same file.
|
|
struct PreprocessorControlDesc {
|
|
// Either tgtok::Ifdef or tgtok::Else.
|
|
tgtok::TokKind Kind;
|
|
|
|
// True, if the condition for this directive is true, false - otherwise.
|
|
// Examples:
|
|
// #ifdef NAME : true, if NAME is defined, false - otherwise.
|
|
// ...
|
|
// #else : false, if NAME is defined, true - otherwise.
|
|
bool IsDefined;
|
|
|
|
// Pointer into CurBuf to the beginning of the preprocessing directive
|
|
// word, e.g.:
|
|
// #ifdef NAME
|
|
// ^ - SrcPos
|
|
SMLoc SrcPos;
|
|
};
|
|
|
|
// We want to disallow code like this:
|
|
// file1.td:
|
|
// #define NAME
|
|
// #ifdef NAME
|
|
// include "file2.td"
|
|
// EOF
|
|
// file2.td:
|
|
// #endif
|
|
// EOF
|
|
//
|
|
// To do this, we clear the preprocessing control stack on entry
|
|
// to each of the included file. PrepIncludeStack is used to store
|
|
// preprocessing control stacks for the current file and all its
|
|
// parent files. The back() element is the preprocessing control
|
|
// stack for the current file.
|
|
std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
|
|
PrepIncludeStack;
|
|
|
|
// Validate that the current preprocessing control stack is empty,
|
|
// since we are about to exit a file, and pop the include stack.
|
|
//
|
|
// If IncludeStackMustBeEmpty is true, the include stack must be empty
|
|
// after the popping, otherwise, the include stack must not be empty
|
|
// after the popping. Basically, the include stack must be empty
|
|
// only if we exit the "top-level" file (i.e. finish lexing).
|
|
//
|
|
// The method returns false, if the current preprocessing control stack
|
|
// is not empty (e.g. there is an unterminated #ifdef/#else),
|
|
// true - otherwise.
|
|
bool prepExitInclude(bool IncludeStackMustBeEmpty);
|
|
|
|
// Look ahead for a preprocessing directive starting from CurPtr. The caller
|
|
// must only call this method, if *(CurPtr - 1) is '#'. If the method matches
|
|
// a preprocessing directive word followed by a whitespace, then it returns
|
|
// one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
|
|
//
|
|
// CurPtr is not adjusted by this method.
|
|
tgtok::TokKind prepIsDirective() const;
|
|
|
|
// Given a preprocessing token kind, adjusts CurPtr to the end
|
|
// of the preprocessing directive word. Returns true, unless
|
|
// an unsupported token kind is passed in.
|
|
//
|
|
// We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
|
|
// to avoid adjusting CurPtr before we are sure that '#' is followed
|
|
// by a preprocessing directive. If it is not, then we fall back to
|
|
// tgtok::paste interpretation of '#'.
|
|
bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
|
|
|
|
// The main "exit" point from the token parsing to preprocessor.
|
|
//
|
|
// The method is called for CurPtr, when prepIsDirective() returns
|
|
// true. The first parameter matches the result of prepIsDirective(),
|
|
// denoting the actual preprocessor directive to be processed.
|
|
//
|
|
// If the preprocessing directive disables the tokens processing, e.g.:
|
|
// #ifdef NAME // NAME is undefined
|
|
// then lexPreprocessor() enters the lines-skipping mode.
|
|
// In this mode, it does not parse any tokens, because the code under
|
|
// the #ifdef may not even be a correct tablegen code. The preprocessor
|
|
// looks for lines containing other preprocessing directives, which
|
|
// may be prepended with whitespaces and C-style comments. If the line
|
|
// does not contain a preprocessing directive, it is skipped completely.
|
|
// Otherwise, the preprocessing directive is processed by recursively
|
|
// calling lexPreprocessor(). The processing of the encountered
|
|
// preprocessing directives includes updating preprocessing control stack
|
|
// and adding new macros into DefinedMacros set.
|
|
//
|
|
// The second parameter controls whether lexPreprocessor() is called from
|
|
// LexToken() (true) or recursively from lexPreprocessor() (false).
|
|
//
|
|
// If ReturnNextLiveToken is true, the method returns the next
|
|
// LEX token following the current directive or following the end
|
|
// of the disabled preprocessing region corresponding to this directive.
|
|
// If ReturnNextLiveToken is false, the method returns the first parameter,
|
|
// unless there were errors encountered in the disabled preprocessing
|
|
// region - in this case, it returns tgtok::Error.
|
|
tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
|
|
bool ReturnNextLiveToken = true);
|
|
|
|
// Worker method for lexPreprocessor() to skip lines after some
|
|
// preprocessing directive up to the buffer end or to the directive
|
|
// that re-enables token processing. The method returns true
|
|
// upon processing the next directive that re-enables tokens
|
|
// processing. False is returned if an error was encountered.
|
|
//
|
|
// Note that prepSkipRegion() calls lexPreprocessor() to process
|
|
// encountered preprocessing directives. In this case, the second
|
|
// parameter to lexPreprocessor() is set to false. Being passed
|
|
// false ReturnNextLiveToken, lexPreprocessor() must never call
|
|
// prepSkipRegion(). We assert this by passing ReturnNextLiveToken
|
|
// to prepSkipRegion() and checking that it is never set to false.
|
|
bool prepSkipRegion(bool MustNeverBeFalse);
|
|
|
|
// Lex name of the macro after either #ifdef or #define. We could have used
|
|
// LexIdentifier(), but it has special handling of "include" word, which
|
|
// could result in awkward diagnostic errors. Consider:
|
|
// ----
|
|
// #ifdef include
|
|
// class ...
|
|
// ----
|
|
// LexIdentifier() will engage LexInclude(), which will complain about
|
|
// missing file with name "class". Instead, prepLexMacroName() will treat
|
|
// "include" as a normal macro name.
|
|
//
|
|
// On entry, CurPtr points to the end of a preprocessing directive word.
|
|
// The method allows for whitespaces between the preprocessing directive
|
|
// and the macro name. The allowed whitespaces are ' ' and '\t'.
|
|
//
|
|
// If the first non-whitespace symbol after the preprocessing directive
|
|
// is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
|
|
// the method updates TokStart to the position of the first non-whitespace
|
|
// symbol, sets CurPtr to the position of the macro name's last symbol,
|
|
// and returns a string reference to the macro name. Otherwise,
|
|
// TokStart is set to the first non-whitespace symbol after the preprocessing
|
|
// directive, and the method returns an empty string reference.
|
|
//
|
|
// In all cases, TokStart may be used to point to the word following
|
|
// the preprocessing directive.
|
|
StringRef prepLexMacroName();
|
|
|
|
// Skip any whitespaces starting from CurPtr. The method is used
|
|
// only in the lines-skipping mode to find the first non-whitespace
|
|
// symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
|
|
// and '\r'. The method skips C-style comments as well, because
|
|
// it is used to find the beginning of the preprocessing directive.
|
|
// If we do not handle C-style comments the following code would
|
|
// result in incorrect detection of a preprocessing directive:
|
|
// /*
|
|
// #ifdef NAME
|
|
// */
|
|
// As long as we skip C-style comments, the following code is correctly
|
|
// recognized as a preprocessing directive:
|
|
// /* first line comment
|
|
// second line comment */ #ifdef NAME
|
|
//
|
|
// The method returns true upon reaching the first non-whitespace symbol
|
|
// or EOF, CurPtr is set to point to this symbol. The method returns false,
|
|
// if an error occurred during skipping of a C-style comment.
|
|
bool prepSkipLineBegin();
|
|
|
|
// Skip any whitespaces or comments after a preprocessing directive.
|
|
// The method returns true upon reaching either end of the line
|
|
// or end of the file. If there is a multiline C-style comment
|
|
// after the preprocessing directive, the method skips
|
|
// the comment, so the final CurPtr may point to one of the next lines.
|
|
// The method returns false, if an error occurred during skipping
|
|
// C- or C++-style comment, or a non-whitespace symbol appears
|
|
// after the preprocessing directive.
|
|
//
|
|
// The method maybe called both during lines-skipping and tokens
|
|
// processing. It actually verifies that only whitespaces or/and
|
|
// comments follow a preprocessing directive.
|
|
//
|
|
// After the execution of this mehod, CurPtr points either to new line
|
|
// symbol, buffer end or non-whitespace symbol following the preprocesing
|
|
// directive.
|
|
bool prepSkipDirectiveEnd();
|
|
|
|
// Skip all symbols to the end of the line/file.
|
|
// The method adjusts CurPtr, so that it points to either new line
|
|
// symbol in the current line or the buffer end.
|
|
void prepSkipToLineEnd();
|
|
|
|
// Return true, if the current preprocessor control stack is such that
|
|
// we should allow lexer to process the next token, false - otherwise.
|
|
//
|
|
// In particular, the method returns true, if all the #ifdef/#else
|
|
// controls on the stack have their IsDefined member set to true.
|
|
bool prepIsProcessingEnabled();
|
|
|
|
// Report an error, if we reach EOF with non-empty preprocessing control
|
|
// stack. This means there is no matching #endif for the previous
|
|
// #ifdef/#else.
|
|
void prepReportPreprocessorStackError();
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|