forked from OSchip/llvm-project
351 lines
7.8 KiB
C++
351 lines
7.8 KiB
C++
//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include <string.h>
|
|
|
|
#include "GoLexer.h"
|
|
|
|
using namespace lldb_private;
|
|
|
|
llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords;
|
|
|
|
GoLexer::GoLexer(const char *src)
|
|
: m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") {}
|
|
|
|
bool GoLexer::SkipWhitespace() {
|
|
bool saw_newline = false;
|
|
for (; m_src < m_end; ++m_src) {
|
|
if (*m_src == '\n')
|
|
saw_newline = true;
|
|
if (*m_src == '/' && !SkipComment())
|
|
return saw_newline;
|
|
else if (!IsWhitespace(*m_src))
|
|
return saw_newline;
|
|
}
|
|
return saw_newline;
|
|
}
|
|
|
|
bool GoLexer::SkipComment() {
|
|
if (m_src[0] == '/' && m_src[1] == '/') {
|
|
for (const char *c = m_src + 2; c < m_end; ++c) {
|
|
if (*c == '\n') {
|
|
m_src = c - 1;
|
|
return true;
|
|
}
|
|
}
|
|
return true;
|
|
} else if (m_src[0] == '/' && m_src[1] == '*') {
|
|
for (const char *c = m_src + 2; c < m_end; ++c) {
|
|
if (c[0] == '*' && c[1] == '/') {
|
|
m_src = c + 1;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const GoLexer::Token &GoLexer::Lex() {
|
|
bool newline = SkipWhitespace();
|
|
const char *start = m_src;
|
|
m_last_token.m_type = InternalLex(newline);
|
|
m_last_token.m_value = llvm::StringRef(start, m_src - start);
|
|
return m_last_token;
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::InternalLex(bool newline) {
|
|
if (m_src >= m_end) {
|
|
return TOK_EOF;
|
|
}
|
|
if (newline) {
|
|
switch (m_last_token.m_type) {
|
|
case TOK_IDENTIFIER:
|
|
case LIT_FLOAT:
|
|
case LIT_IMAGINARY:
|
|
case LIT_INTEGER:
|
|
case LIT_RUNE:
|
|
case LIT_STRING:
|
|
case KEYWORD_BREAK:
|
|
case KEYWORD_CONTINUE:
|
|
case KEYWORD_FALLTHROUGH:
|
|
case KEYWORD_RETURN:
|
|
case OP_PLUS_PLUS:
|
|
case OP_MINUS_MINUS:
|
|
case OP_RPAREN:
|
|
case OP_RBRACK:
|
|
case OP_RBRACE:
|
|
return OP_SEMICOLON;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
char c = *m_src;
|
|
switch (c) {
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
return DoNumber();
|
|
case '+':
|
|
case '-':
|
|
case '*':
|
|
case '/':
|
|
case '%':
|
|
case '&':
|
|
case '|':
|
|
case '^':
|
|
case '<':
|
|
case '>':
|
|
case '!':
|
|
case ':':
|
|
case ';':
|
|
case '(':
|
|
case ')':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
case ',':
|
|
case '=':
|
|
return DoOperator();
|
|
case '.':
|
|
if (IsDecimal(m_src[1]))
|
|
return DoNumber();
|
|
return DoOperator();
|
|
case '$':
|
|
// For lldb persistent vars.
|
|
return DoIdent();
|
|
case '"':
|
|
case '`':
|
|
return DoString();
|
|
case '\'':
|
|
return DoRune();
|
|
default:
|
|
break;
|
|
}
|
|
if (IsLetterOrDigit(c))
|
|
return DoIdent();
|
|
++m_src;
|
|
return TOK_INVALID;
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::DoOperator() {
|
|
TokenType t = TOK_INVALID;
|
|
if (m_end - m_src > 2) {
|
|
t = LookupKeyword(llvm::StringRef(m_src, 3));
|
|
if (t != TOK_INVALID)
|
|
m_src += 3;
|
|
}
|
|
if (t == TOK_INVALID && m_end - m_src > 1) {
|
|
t = LookupKeyword(llvm::StringRef(m_src, 2));
|
|
if (t != TOK_INVALID)
|
|
m_src += 2;
|
|
}
|
|
if (t == TOK_INVALID) {
|
|
t = LookupKeyword(llvm::StringRef(m_src, 1));
|
|
++m_src;
|
|
}
|
|
return t;
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::DoIdent() {
|
|
const char *start = m_src++;
|
|
while (m_src < m_end && IsLetterOrDigit(*m_src)) {
|
|
++m_src;
|
|
}
|
|
TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start));
|
|
if (kw != TOK_INVALID)
|
|
return kw;
|
|
return TOK_IDENTIFIER;
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::DoNumber() {
|
|
if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) {
|
|
m_src += 2;
|
|
while (IsHexChar(*m_src))
|
|
++m_src;
|
|
return LIT_INTEGER;
|
|
}
|
|
bool dot_ok = true;
|
|
bool e_ok = true;
|
|
while (true) {
|
|
while (IsDecimal(*m_src))
|
|
++m_src;
|
|
switch (*m_src) {
|
|
case 'i':
|
|
++m_src;
|
|
return LIT_IMAGINARY;
|
|
case '.':
|
|
if (!dot_ok)
|
|
return LIT_FLOAT;
|
|
++m_src;
|
|
dot_ok = false;
|
|
break;
|
|
case 'e':
|
|
case 'E':
|
|
if (!e_ok)
|
|
return LIT_FLOAT;
|
|
dot_ok = e_ok = false;
|
|
++m_src;
|
|
if (*m_src == '+' || *m_src == '-')
|
|
++m_src;
|
|
break;
|
|
default:
|
|
if (dot_ok)
|
|
return LIT_INTEGER;
|
|
return LIT_FLOAT;
|
|
}
|
|
}
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::DoRune() {
|
|
while (++m_src < m_end) {
|
|
switch (*m_src) {
|
|
case '\'':
|
|
++m_src;
|
|
return LIT_RUNE;
|
|
case '\n':
|
|
return TOK_INVALID;
|
|
case '\\':
|
|
if (m_src[1] == '\n')
|
|
return TOK_INVALID;
|
|
++m_src;
|
|
}
|
|
}
|
|
return TOK_INVALID;
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::DoString() {
|
|
if (*m_src == '`') {
|
|
while (++m_src < m_end) {
|
|
if (*m_src == '`') {
|
|
++m_src;
|
|
return LIT_STRING;
|
|
}
|
|
}
|
|
return TOK_INVALID;
|
|
}
|
|
while (++m_src < m_end) {
|
|
switch (*m_src) {
|
|
case '"':
|
|
++m_src;
|
|
return LIT_STRING;
|
|
case '\n':
|
|
return TOK_INVALID;
|
|
case '\\':
|
|
if (m_src[1] == '\n')
|
|
return TOK_INVALID;
|
|
++m_src;
|
|
}
|
|
}
|
|
return TOK_INVALID;
|
|
}
|
|
|
|
GoLexer::TokenType GoLexer::LookupKeyword(llvm::StringRef id) {
|
|
if (m_keywords == nullptr)
|
|
m_keywords = InitKeywords();
|
|
const auto &it = m_keywords->find(id);
|
|
if (it == m_keywords->end())
|
|
return TOK_INVALID;
|
|
return it->second;
|
|
}
|
|
|
|
llvm::StringRef GoLexer::LookupToken(TokenType t) {
|
|
if (m_keywords == nullptr)
|
|
m_keywords = InitKeywords();
|
|
for (const auto &e : *m_keywords) {
|
|
if (e.getValue() == t)
|
|
return e.getKey();
|
|
}
|
|
return "";
|
|
}
|
|
|
|
llvm::StringMap<GoLexer::TokenType> *GoLexer::InitKeywords() {
|
|
auto &result = *new llvm::StringMap<TokenType>(128);
|
|
result["break"] = KEYWORD_BREAK;
|
|
result["default"] = KEYWORD_DEFAULT;
|
|
result["func"] = KEYWORD_FUNC;
|
|
result["interface"] = KEYWORD_INTERFACE;
|
|
result["select"] = KEYWORD_SELECT;
|
|
result["case"] = KEYWORD_CASE;
|
|
result["defer"] = KEYWORD_DEFER;
|
|
result["go"] = KEYWORD_GO;
|
|
result["map"] = KEYWORD_MAP;
|
|
result["struct"] = KEYWORD_STRUCT;
|
|
result["chan"] = KEYWORD_CHAN;
|
|
result["else"] = KEYWORD_ELSE;
|
|
result["goto"] = KEYWORD_GOTO;
|
|
result["package"] = KEYWORD_PACKAGE;
|
|
result["switch"] = KEYWORD_SWITCH;
|
|
result["const"] = KEYWORD_CONST;
|
|
result["fallthrough"] = KEYWORD_FALLTHROUGH;
|
|
result["if"] = KEYWORD_IF;
|
|
result["range"] = KEYWORD_RANGE;
|
|
result["type"] = KEYWORD_TYPE;
|
|
result["continue"] = KEYWORD_CONTINUE;
|
|
result["for"] = KEYWORD_FOR;
|
|
result["import"] = KEYWORD_IMPORT;
|
|
result["return"] = KEYWORD_RETURN;
|
|
result["var"] = KEYWORD_VAR;
|
|
result["+"] = OP_PLUS;
|
|
result["-"] = OP_MINUS;
|
|
result["*"] = OP_STAR;
|
|
result["/"] = OP_SLASH;
|
|
result["%"] = OP_PERCENT;
|
|
result["&"] = OP_AMP;
|
|
result["|"] = OP_PIPE;
|
|
result["^"] = OP_CARET;
|
|
result["<<"] = OP_LSHIFT;
|
|
result[">>"] = OP_RSHIFT;
|
|
result["&^"] = OP_AMP_CARET;
|
|
result["+="] = OP_PLUS_EQ;
|
|
result["-="] = OP_MINUS_EQ;
|
|
result["*="] = OP_STAR_EQ;
|
|
result["/="] = OP_SLASH_EQ;
|
|
result["%="] = OP_PERCENT_EQ;
|
|
result["&="] = OP_AMP_EQ;
|
|
result["|="] = OP_PIPE_EQ;
|
|
result["^="] = OP_CARET_EQ;
|
|
result["<<="] = OP_LSHIFT_EQ;
|
|
result[">>="] = OP_RSHIFT_EQ;
|
|
result["&^="] = OP_AMP_CARET_EQ;
|
|
result["&&"] = OP_AMP_AMP;
|
|
result["||"] = OP_PIPE_PIPE;
|
|
result["<-"] = OP_LT_MINUS;
|
|
result["++"] = OP_PLUS_PLUS;
|
|
result["--"] = OP_MINUS_MINUS;
|
|
result["=="] = OP_EQ_EQ;
|
|
result["<"] = OP_LT;
|
|
result[">"] = OP_GT;
|
|
result["="] = OP_EQ;
|
|
result["!"] = OP_BANG;
|
|
result["!="] = OP_BANG_EQ;
|
|
result["<="] = OP_LT_EQ;
|
|
result[">="] = OP_GT_EQ;
|
|
result[":="] = OP_COLON_EQ;
|
|
result["..."] = OP_DOTS;
|
|
result["("] = OP_LPAREN;
|
|
result["["] = OP_LBRACK;
|
|
result["{"] = OP_LBRACE;
|
|
result[","] = OP_COMMA;
|
|
result["."] = OP_DOT;
|
|
result[")"] = OP_RPAREN;
|
|
result["]"] = OP_RBRACK;
|
|
result["}"] = OP_RBRACE;
|
|
result[";"] = OP_SEMICOLON;
|
|
result[":"] = OP_COLON;
|
|
return &result;
|
|
}
|