llvm-project/lldb/source/Plugins/ExpressionParser/Go/GoLexer.cpp

//===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include <string.h>

#include "GoLexer.h"

using namespace lldb_private;

llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords;

GoLexer::GoLexer(const char *src)
    : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") {}

bool GoLexer::SkipWhitespace() {
  bool saw_newline = false;
  for (; m_src < m_end; ++m_src) {
    if (*m_src == '\n')
      saw_newline = true;
    if (*m_src == '/' && !SkipComment())
      return saw_newline;
    else if (!IsWhitespace(*m_src))
      return saw_newline;
  }
  return saw_newline;
}

bool GoLexer::SkipComment() {
  if (m_src[0] == '/' && m_src[1] == '/') {
    for (const char *c = m_src + 2; c < m_end; ++c) {
      if (*c == '\n') {
        m_src = c - 1;
        return true;
      }
    }
    return true;
  } else if (m_src[0] == '/' && m_src[1] == '*') {
    for (const char *c = m_src + 2; c < m_end; ++c) {
      if (c[0] == '*' && c[1] == '/') {
        m_src = c + 1;
        return true;
      }
    }
  }
  return false;
}

const GoLexer::Token &GoLexer::Lex() {
  bool newline = SkipWhitespace();
  const char *start = m_src;
  m_last_token.m_type = InternalLex(newline);
  m_last_token.m_value = llvm::StringRef(start, m_src - start);
  return m_last_token;
}

GoLexer::TokenType GoLexer::InternalLex(bool newline) {
  if (m_src >= m_end) {
    return TOK_EOF;
  }
  if (newline) {
    switch (m_last_token.m_type) {
    case TOK_IDENTIFIER:
    case LIT_FLOAT:
    case LIT_IMAGINARY:
    case LIT_INTEGER:
    case LIT_RUNE:
    case LIT_STRING:
    case KEYWORD_BREAK:
    case KEYWORD_CONTINUE:
    case KEYWORD_FALLTHROUGH:
    case KEYWORD_RETURN:
    case OP_PLUS_PLUS:
    case OP_MINUS_MINUS:
    case OP_RPAREN:
    case OP_RBRACK:
    case OP_RBRACE:
      return OP_SEMICOLON;
    default:
      break;
    }
  }
  char c = *m_src;
  switch (c) {
  case '0':
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    return DoNumber();
  case '+':
  case '-':
  case '*':
  case '/':
  case '%':
  case '&':
  case '|':
  case '^':
  case '<':
  case '>':
  case '!':
  case ':':
  case ';':
  case '(':
  case ')':
  case '[':
  case ']':
  case '{':
  case '}':
  case ',':
  case '=':
    return DoOperator();
  case '.':
    if (IsDecimal(m_src[1]))
      return DoNumber();
    return DoOperator();
  case '$':
    // For lldb persistent vars.
    return DoIdent();
  case '"':
  case '`':
    return DoString();
  case '\'':
    return DoRune();
  default:
    break;
  }
  if (IsLetterOrDigit(c))
    return DoIdent();
  ++m_src;
  return TOK_INVALID;
}

GoLexer::TokenType GoLexer::DoOperator() {
  TokenType t = TOK_INVALID;
  if (m_end - m_src > 2) {
    t = LookupKeyword(llvm::StringRef(m_src, 3));
    if (t != TOK_INVALID)
      m_src += 3;
  }
  if (t == TOK_INVALID && m_end - m_src > 1) {
    t = LookupKeyword(llvm::StringRef(m_src, 2));
    if (t != TOK_INVALID)
      m_src += 2;
  }
  if (t == TOK_INVALID) {
    t = LookupKeyword(llvm::StringRef(m_src, 1));
    ++m_src;
  }
  return t;
}

GoLexer::TokenType GoLexer::DoIdent() {
  const char *start = m_src++;
  while (m_src < m_end && IsLetterOrDigit(*m_src)) {
    ++m_src;
  }
  TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start));
  if (kw != TOK_INVALID)
    return kw;
  return TOK_IDENTIFIER;
}

GoLexer::TokenType GoLexer::DoNumber() {
  if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) {
    m_src += 2;
    while (IsHexChar(*m_src))
      ++m_src;
    return LIT_INTEGER;
  }
  bool dot_ok = true;
  bool e_ok = true;
  while (true) {
    while (IsDecimal(*m_src))
      ++m_src;
    switch (*m_src) {
    case 'i':
      ++m_src;
      return LIT_IMAGINARY;
    case '.':
      if (!dot_ok)
        return LIT_FLOAT;
      ++m_src;
      dot_ok = false;
      break;
    case 'e':
    case 'E':
      if (!e_ok)
        return LIT_FLOAT;
      dot_ok = e_ok = false;
      ++m_src;
      if (*m_src == '+' || *m_src == '-')
        ++m_src;
      break;
    default:
      if (dot_ok)
        return LIT_INTEGER;
      return LIT_FLOAT;
    }
  }
}

GoLexer::TokenType GoLexer::DoRune() {
  while (++m_src < m_end) {
    switch (*m_src) {
    case '\'':
      ++m_src;
      return LIT_RUNE;
    case '\n':
      return TOK_INVALID;
    case '\\':
      if (m_src[1] == '\n')
        return TOK_INVALID;
      ++m_src;
    }
  }
  return TOK_INVALID;
}

GoLexer::TokenType GoLexer::DoString() {
  if (*m_src == '`') {
    while (++m_src < m_end) {
      if (*m_src == '`') {
        ++m_src;
        return LIT_STRING;
      }
    }
    return TOK_INVALID;
  }
  while (++m_src < m_end) {
    switch (*m_src) {
    case '"':
      ++m_src;
      return LIT_STRING;
    case '\n':
      return TOK_INVALID;
    case '\\':
      if (m_src[1] == '\n')
        return TOK_INVALID;
      ++m_src;
    }
  }
  return TOK_INVALID;
}

GoLexer::TokenType GoLexer::LookupKeyword(llvm::StringRef id) {
  if (m_keywords == nullptr)
    m_keywords = InitKeywords();
  const auto &it = m_keywords->find(id);
  if (it == m_keywords->end())
    return TOK_INVALID;
  return it->second;
}

llvm::StringRef GoLexer::LookupToken(TokenType t) {
  if (m_keywords == nullptr)
    m_keywords = InitKeywords();
  for (const auto &e : *m_keywords) {
    if (e.getValue() == t)
      return e.getKey();
  }
  return "";
}

llvm::StringMap<GoLexer::TokenType> *GoLexer::InitKeywords() {
  auto &result = *new llvm::StringMap<TokenType>(128);
  result["break"] = KEYWORD_BREAK;
  result["default"] = KEYWORD_DEFAULT;
  result["func"] = KEYWORD_FUNC;
  result["interface"] = KEYWORD_INTERFACE;
  result["select"] = KEYWORD_SELECT;
  result["case"] = KEYWORD_CASE;
  result["defer"] = KEYWORD_DEFER;
  result["go"] = KEYWORD_GO;
  result["map"] = KEYWORD_MAP;
  result["struct"] = KEYWORD_STRUCT;
  result["chan"] = KEYWORD_CHAN;
  result["else"] = KEYWORD_ELSE;
  result["goto"] = KEYWORD_GOTO;
  result["package"] = KEYWORD_PACKAGE;
  result["switch"] = KEYWORD_SWITCH;
  result["const"] = KEYWORD_CONST;
  result["fallthrough"] = KEYWORD_FALLTHROUGH;
  result["if"] = KEYWORD_IF;
  result["range"] = KEYWORD_RANGE;
  result["type"] = KEYWORD_TYPE;
  result["continue"] = KEYWORD_CONTINUE;
  result["for"] = KEYWORD_FOR;
  result["import"] = KEYWORD_IMPORT;
  result["return"] = KEYWORD_RETURN;
  result["var"] = KEYWORD_VAR;
  result["+"] = OP_PLUS;
  result["-"] = OP_MINUS;
  result["*"] = OP_STAR;
  result["/"] = OP_SLASH;
  result["%"] = OP_PERCENT;
  result["&"] = OP_AMP;
  result["|"] = OP_PIPE;
  result["^"] = OP_CARET;
  result["<<"] = OP_LSHIFT;
  result[">>"] = OP_RSHIFT;
  result["&^"] = OP_AMP_CARET;
  result["+="] = OP_PLUS_EQ;
  result["-="] = OP_MINUS_EQ;
  result["*="] = OP_STAR_EQ;
  result["/="] = OP_SLASH_EQ;
  result["%="] = OP_PERCENT_EQ;
  result["&="] = OP_AMP_EQ;
  result["|="] = OP_PIPE_EQ;
  result["^="] = OP_CARET_EQ;
  result["<<="] = OP_LSHIFT_EQ;
  result[">>="] = OP_RSHIFT_EQ;
  result["&^="] = OP_AMP_CARET_EQ;
  result["&&"] = OP_AMP_AMP;
  result["||"] = OP_PIPE_PIPE;
  result["<-"] = OP_LT_MINUS;
  result["++"] = OP_PLUS_PLUS;
  result["--"] = OP_MINUS_MINUS;
  result["=="] = OP_EQ_EQ;
  result["<"] = OP_LT;
  result[">"] = OP_GT;
  result["="] = OP_EQ;
  result["!"] = OP_BANG;
  result["!="] = OP_BANG_EQ;
  result["<="] = OP_LT_EQ;
  result[">="] = OP_GT_EQ;
  result[":="] = OP_COLON_EQ;
  result["..."] = OP_DOTS;
  result["("] = OP_LPAREN;
  result["["] = OP_LBRACK;
  result["{"] = OP_LBRACE;
  result[","] = OP_COMMA;
  result["."] = OP_DOT;
  result[")"] = OP_RPAREN;
  result["]"] = OP_RBRACK;
  result["}"] = OP_RBRACE;
  result[";"] = OP_SEMICOLON;
  result[":"] = OP_COLON;
  return &result;
}