llvm-project/flang/lib/Parser/token-sequence.cpp

371 lines
10 KiB
C++

//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "token-sequence.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
#include "llvm/Support/raw_ostream.h"
namespace Fortran::parser {
TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
clear();
swap(that);
return *this;
}
void TokenSequence::clear() {
start_.clear();
nextStart_ = 0;
char_.clear();
provenances_.clear();
}
void TokenSequence::pop_back() {
CHECK(!start_.empty());
CHECK(nextStart_ > start_.back());
std::size_t bytes{nextStart_ - start_.back()};
nextStart_ = start_.back();
start_.pop_back();
char_.resize(nextStart_);
provenances_.RemoveLastBytes(bytes);
}
void TokenSequence::shrink_to_fit() {
start_.shrink_to_fit();
char_.shrink_to_fit();
provenances_.shrink_to_fit();
}
void TokenSequence::swap(TokenSequence &that) {
start_.swap(that.start_);
std::swap(nextStart_, that.nextStart_);
char_.swap(that.char_);
provenances_.swap(that.provenances_);
}
std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
std::size_t tokens{start_.size()};
for (; at < tokens; ++at) {
if (!TokenAt(at).IsBlank()) {
return at;
}
}
return tokens; // even if at > tokens
}
// C-style /*comments*/ are removed from preprocessing directive
// token sequences by the prescanner, but not C++ or Fortran
// free-form line-ending comments (//... and !...) because
// ignoring them is directive-specific.
bool TokenSequence::IsAnythingLeft(std::size_t at) const {
std::size_t tokens{start_.size()};
for (; at < tokens; ++at) {
auto tok{TokenAt(at)};
const char *end{tok.end()};
for (const char *p{tok.begin()}; p < end; ++p) {
switch (*p) {
case '/':
return p + 1 >= end || p[1] != '/';
case '!':
return false;
case ' ':
break;
default:
return true;
}
}
}
return false;
}
void TokenSequence::Put(const TokenSequence &that) {
if (nextStart_ < char_.size()) {
start_.push_back(nextStart_);
}
int offset = char_.size();
for (int st : that.start_) {
start_.push_back(st + offset);
}
char_.insert(char_.end(), that.char_.begin(), that.char_.end());
nextStart_ = char_.size();
provenances_.Put(that.provenances_);
}
void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
std::size_t offset{0};
std::size_t tokens{that.SizeInTokens()};
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock tok{that.TokenAt(j)};
Put(tok, range.OffsetMember(offset));
offset += tok.size();
}
CHECK(offset == range.size());
}
void TokenSequence::Put(
const TokenSequence &that, std::size_t at, std::size_t tokens) {
ProvenanceRange provenance;
std::size_t offset{0};
for (; tokens-- > 0; ++at) {
CharBlock tok{that.TokenAt(at)};
std::size_t tokBytes{tok.size()};
for (std::size_t j{0}; j < tokBytes; ++j) {
if (offset == provenance.size()) {
provenance = that.provenances_.Map(that.start_[at] + j);
offset = 0;
}
PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
}
CloseToken();
}
}
void TokenSequence::Put(
const char *s, std::size_t bytes, Provenance provenance) {
for (std::size_t j{0}; j < bytes; ++j) {
PutNextTokenChar(s[j], provenance + j);
}
CloseToken();
}
void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
Put(&t[0], t.size(), provenance);
}
void TokenSequence::Put(const std::string &s, Provenance provenance) {
Put(s.data(), s.size(), provenance);
}
void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
Put(ss.str(), provenance);
}
TokenSequence &TokenSequence::ToLowerCase() {
std::size_t tokens{start_.size()};
std::size_t chars{char_.size()};
std::size_t atToken{0};
for (std::size_t j{0}; j < chars;) {
std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
char *p{&char_[j]};
char const *limit{char_.data() + nextStart};
j = nextStart;
if (IsDecimalDigit(*p)) {
while (p < limit && IsDecimalDigit(*p)) {
++p;
}
if (p >= limit) {
} else if (*p == 'h' || *p == 'H') {
// Hollerith
*p = 'h';
} else if (*p == '_') {
// kind-prefixed character literal (e.g., 1_"ABC")
} else {
// exponent
for (; p < limit; ++p) {
*p = ToLowerCaseLetter(*p);
}
}
} else if (limit[-1] == '\'' || limit[-1] == '"') {
if (*p == limit[-1]) {
// Character literal without prefix
} else if (p[1] == limit[-1]) {
// BOZX-prefixed constant
for (; p < limit; ++p) {
*p = ToLowerCaseLetter(*p);
}
} else {
// Literal with kind-param prefix name (e.g., K_"ABC").
for (; *p != limit[-1]; ++p) {
*p = ToLowerCaseLetter(*p);
}
}
} else {
for (; p < limit; ++p) {
*p = ToLowerCaseLetter(*p);
}
}
}
return *this;
}
bool TokenSequence::HasBlanks(std::size_t firstChar) const {
std::size_t tokens{SizeInTokens()};
for (std::size_t j{0}; j < tokens; ++j) {
if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
return true;
}
}
return false;
}
bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
std::size_t tokens{SizeInTokens()};
bool lastWasBlank{false};
for (std::size_t j{0}; j < tokens; ++j) {
bool isBlank{TokenAt(j).IsBlank()};
if (isBlank && lastWasBlank && start_[j] >= firstChar) {
return true;
}
lastWasBlank = isBlank;
}
return false;
}
TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
std::size_t tokens{SizeInTokens()};
TokenSequence result;
for (std::size_t j{0}; j < tokens; ++j) {
if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
result.Put(*this, j);
}
}
swap(result);
return *this;
}
TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
std::size_t tokens{SizeInTokens()};
TokenSequence result;
bool lastWasBlank{false};
for (std::size_t j{0}; j < tokens; ++j) {
bool isBlank{TokenAt(j).IsBlank()};
if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
result.Put(*this, j);
}
lastWasBlank = isBlank;
}
swap(result);
return *this;
}
TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
std::size_t tokens{SizeInTokens()};
for (std::size_t j{0}; j < tokens; ++j) {
if (TokenAt(j).FirstNonBlank() == '!') {
if (skipFirst) {
skipFirst = false;
} else {
TokenSequence result;
if (j > 0) {
result.Put(*this, 0, j - 1);
}
swap(result);
return *this;
}
}
}
return *this;
}
void TokenSequence::Emit(CookedSource &cooked) const {
cooked.Put(&char_[0], char_.size());
cooked.PutProvenanceMappings(provenances_);
}
llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
<< nextStart_ << '\n';
for (std::size_t j{0}; j < start_.size(); ++j) {
o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
<< "'\n";
}
return o;
}
Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
ProvenanceRange range{provenances_.Map(offset)};
return range.start();
}
Provenance TokenSequence::GetTokenProvenance(
std::size_t token, std::size_t offset) const {
return GetCharProvenance(start_[token] + offset);
}
ProvenanceRange TokenSequence::GetTokenProvenanceRange(
std::size_t token, std::size_t offset) const {
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
return range.Prefix(TokenBytes(token) - offset);
}
ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
std::size_t token, std::size_t tokens) const {
if (tokens == 0) {
return {};
}
ProvenanceRange range{provenances_.Map(start_[token])};
while (--tokens > 0 &&
range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
}
return range;
}
ProvenanceRange TokenSequence::GetProvenanceRange() const {
return GetIntervalProvenanceRange(0, start_.size());
}
const TokenSequence &TokenSequence::CheckBadFortranCharacters(
Messages &messages) const {
std::size_t tokens{SizeInTokens()};
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
if (ch == '!' && j == 0) {
// allow in !dir$
} else if (ch < ' ' || ch >= '\x7f') {
messages.Say(GetTokenProvenanceRange(j),
"bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
} else {
messages.Say(GetTokenProvenanceRange(j),
"bad character ('%c') in Fortran token"_err_en_US, ch);
}
}
}
return *this;
}
const TokenSequence &TokenSequence::CheckBadParentheses(
Messages &messages) const {
// First, a quick pass with no allocation for the common case
int nesting{0};
std::size_t tokens{SizeInTokens()};
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
if (ch == '(') {
++nesting;
} else if (ch == ')') {
--nesting;
}
}
if (nesting != 0) {
// There's an error; diagnose it
std::vector<std::size_t> stack;
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
if (ch == '(') {
stack.push_back(j);
} else if (ch == ')') {
if (stack.empty()) {
messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
return *this;
}
stack.pop_back();
}
}
CHECK(!stack.empty());
messages.Say(
GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
}
return *this;
}
} // namespace Fortran::parser