2020-02-25 23:11:52 +08:00
|
|
|
//===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
|
2018-05-02 03:50:34 +08:00
|
|
|
//
|
2019-12-21 04:52:07 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-05-02 03:50:34 +08:00
|
|
|
//
|
2020-01-11 04:12:03 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2018-05-02 03:50:34 +08:00
|
|
|
|
2018-02-17 03:42:17 +08:00
|
|
|
#ifndef FORTRAN_PARSER_PRESCAN_H_
|
|
|
|
#define FORTRAN_PARSER_PRESCAN_H_
|
2018-01-31 03:54:47 +08:00
|
|
|
|
|
|
|
// Defines a fast Fortran source prescanning phase that implements some
|
|
|
|
// character-level features of the language that can be inefficient to
|
|
|
|
// support directly in a backtracking parser. This phase handles Fortran
|
|
|
|
// line continuation, comment removal, card image margins, padding out
|
2018-02-14 04:24:54 +08:00
|
|
|
// fixed form character literals on truncated card images, file
|
|
|
|
// inclusion, and driving the Fortran source preprocessor.
|
2018-01-31 03:54:47 +08:00
|
|
|
|
2018-02-14 04:50:47 +08:00
|
|
|
#include "token-sequence.h"
|
2020-02-25 23:11:52 +08:00
|
|
|
#include "flang/Common/Fortran-features.h"
|
|
|
|
#include "flang/Parser/characters.h"
|
|
|
|
#include "flang/Parser/message.h"
|
|
|
|
#include "flang/Parser/provenance.h"
|
2018-03-21 01:59:07 +08:00
|
|
|
#include <bitset>
|
2018-01-31 03:54:47 +08:00
|
|
|
#include <optional>
|
2018-02-10 06:04:11 +08:00
|
|
|
#include <string>
|
2018-03-21 01:59:07 +08:00
|
|
|
#include <unordered_set>
|
2018-01-31 03:54:47 +08:00
|
|
|
|
2018-05-03 04:48:12 +08:00
|
|
|
namespace Fortran::parser {
|
2018-01-31 03:54:47 +08:00
|
|
|
|
2018-02-14 04:50:47 +08:00
|
|
|
class Messages;
|
|
|
|
class Preprocessor;
|
|
|
|
|
2018-01-31 03:54:47 +08:00
|
|
|
class Prescanner {
|
2018-02-06 04:54:36 +08:00
|
|
|
public:
|
2020-09-18 03:19:42 +08:00
|
|
|
Prescanner(Messages &, CookedSource &, Preprocessor &,
|
2019-11-07 03:15:03 +08:00
|
|
|
common::LanguageFeatureControl);
|
2018-02-14 06:22:08 +08:00
|
|
|
Prescanner(const Prescanner &);
|
2018-02-02 07:01:23 +08:00
|
|
|
|
2020-09-18 03:19:42 +08:00
|
|
|
const AllSources &allSources() const { return allSources_; }
|
|
|
|
AllSources &allSources() { return allSources_; }
|
|
|
|
const Messages &messages() const { return messages_; }
|
|
|
|
Messages &messages() { return messages_; }
|
|
|
|
const Preprocessor &preprocessor() const { return preprocessor_; }
|
|
|
|
Preprocessor &preprocessor() { return preprocessor_; }
|
2018-01-31 03:54:47 +08:00
|
|
|
|
|
|
|
Prescanner &set_fixedForm(bool yes) {
|
|
|
|
inFixedForm_ = yes;
|
|
|
|
return *this;
|
|
|
|
}
|
2018-03-01 08:56:10 +08:00
|
|
|
Prescanner &set_encoding(Encoding code) {
|
|
|
|
encoding_ = code;
|
|
|
|
return *this;
|
|
|
|
}
|
2018-01-31 03:54:47 +08:00
|
|
|
Prescanner &set_fixedFormColumnLimit(int limit) {
|
|
|
|
fixedFormColumnLimit_ = limit;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2018-03-21 01:59:07 +08:00
|
|
|
Prescanner &AddCompilerDirectiveSentinel(const std::string &);
|
|
|
|
|
2018-04-03 06:51:04 +08:00
|
|
|
void Prescan(ProvenanceRange);
|
2018-03-24 06:14:52 +08:00
|
|
|
void Statement();
|
2018-03-17 07:58:35 +08:00
|
|
|
void NextLine();
|
2018-02-14 06:22:08 +08:00
|
|
|
|
|
|
|
// Callbacks for use by Preprocessor.
|
2019-06-12 03:06:18 +08:00
|
|
|
bool IsAtEnd() const { return nextLine_ >= limit_; }
|
2018-03-17 07:58:35 +08:00
|
|
|
bool IsNextLinePreprocessorDirective() const;
|
|
|
|
TokenSequence TokenizePreprocessorDirective();
|
2018-02-10 06:04:11 +08:00
|
|
|
Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
|
2018-03-20 02:48:49 +08:00
|
|
|
|
2020-08-07 07:56:14 +08:00
|
|
|
template <typename... A> Message &Say(A &&...a) {
|
2020-09-01 03:22:24 +08:00
|
|
|
return messages_.Say(std::forward<A>(a)...);
|
2018-08-09 02:29:05 +08:00
|
|
|
}
|
2018-01-31 03:54:47 +08:00
|
|
|
|
2018-02-06 04:54:36 +08:00
|
|
|
private:
|
2018-03-24 06:14:52 +08:00
|
|
|
struct LineClassification {
|
|
|
|
enum class Kind {
|
|
|
|
Comment,
|
2018-05-03 07:09:12 +08:00
|
|
|
ConditionalCompilationDirective,
|
2020-03-29 12:00:16 +08:00
|
|
|
IncludeDirective, // #include
|
|
|
|
DefinitionDirective, // #define & #undef
|
2018-03-24 06:14:52 +08:00
|
|
|
PreprocessorDirective,
|
2020-03-29 12:00:16 +08:00
|
|
|
IncludeLine, // Fortran INCLUDE
|
2018-03-24 06:14:52 +08:00
|
|
|
CompilerDirective,
|
|
|
|
Source
|
|
|
|
};
|
|
|
|
LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
|
2020-03-29 12:00:16 +08:00
|
|
|
: kind{k}, payloadOffset{po}, sentinel{s} {}
|
2018-03-24 06:14:52 +08:00
|
|
|
LineClassification(LineClassification &&) = default;
|
|
|
|
Kind kind;
|
2020-03-29 12:00:16 +08:00
|
|
|
std::size_t payloadOffset; // byte offset of content
|
|
|
|
const char *sentinel; // if it's a compiler directive
|
2018-03-24 06:14:52 +08:00
|
|
|
};
|
|
|
|
|
2018-02-02 04:08:02 +08:00
|
|
|
void BeginSourceLine(const char *at) {
|
2018-01-31 03:54:47 +08:00
|
|
|
at_ = at;
|
2018-02-13 03:56:42 +08:00
|
|
|
column_ = 1;
|
2018-01-31 03:54:47 +08:00
|
|
|
tabInCurrentLine_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BeginSourceLineAndAdvance() {
|
2019-06-12 03:06:18 +08:00
|
|
|
BeginSourceLine(nextLine_);
|
2018-01-31 03:54:47 +08:00
|
|
|
NextLine();
|
|
|
|
}
|
|
|
|
|
2020-08-08 07:44:08 +08:00
|
|
|
void BeginStatementAndAdvance() {
|
|
|
|
BeginSourceLineAndAdvance();
|
|
|
|
slashInCurrentStatement_ = false;
|
|
|
|
preventHollerith_ = false;
|
|
|
|
delimiterNesting_ = 0;
|
|
|
|
}
|
|
|
|
|
2018-02-10 06:04:11 +08:00
|
|
|
Provenance GetProvenance(const char *sourceChar) const {
|
2018-02-16 02:42:36 +08:00
|
|
|
return startProvenance_ + (sourceChar - start_);
|
2018-02-10 06:04:11 +08:00
|
|
|
}
|
|
|
|
|
2018-04-24 03:17:11 +08:00
|
|
|
ProvenanceRange GetProvenanceRange(
|
|
|
|
const char *first, const char *afterLast) const {
|
|
|
|
std::size_t bytes = afterLast - first;
|
|
|
|
return {startProvenance_ + (first - start_), bytes};
|
|
|
|
}
|
|
|
|
|
2018-05-11 04:48:55 +08:00
|
|
|
void EmitChar(TokenSequence &tokens, char ch) {
|
|
|
|
tokens.PutNextTokenChar(ch, GetCurrentProvenance());
|
2018-02-10 06:04:11 +08:00
|
|
|
}
|
|
|
|
|
2018-05-11 04:48:55 +08:00
|
|
|
void EmitInsertedChar(TokenSequence &tokens, char ch) {
|
2020-09-01 03:22:24 +08:00
|
|
|
Provenance provenance{allSources_.CompilerInsertionProvenance(ch)};
|
2018-05-11 04:48:55 +08:00
|
|
|
tokens.PutNextTokenChar(ch, provenance);
|
2018-02-17 03:14:11 +08:00
|
|
|
}
|
|
|
|
|
2018-05-11 04:48:55 +08:00
|
|
|
char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
|
2018-02-10 06:04:11 +08:00
|
|
|
EmitChar(tokens, ch);
|
2018-01-31 03:54:47 +08:00
|
|
|
NextChar();
|
|
|
|
return *at_;
|
|
|
|
}
|
|
|
|
|
2018-05-11 04:48:55 +08:00
|
|
|
bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
|
|
|
|
bool InFixedFormSource() const {
|
|
|
|
return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
|
|
|
|
}
|
|
|
|
|
2019-03-16 04:57:41 +08:00
|
|
|
bool IsCComment(const char *p) const {
|
|
|
|
return p[0] == '/' && p[1] == '*' &&
|
|
|
|
(inPreprocessorDirective_ ||
|
|
|
|
(!inCharLiteral_ &&
|
2019-11-07 03:15:03 +08:00
|
|
|
features_.IsEnabled(
|
|
|
|
common::LanguageFeature::ClassicCComments)));
|
2019-03-16 04:57:41 +08:00
|
|
|
}
|
|
|
|
|
2020-08-26 00:39:09 +08:00
|
|
|
void LabelField(TokenSequence &);
|
2018-05-04 02:09:04 +08:00
|
|
|
void SkipToEndOfLine();
|
2019-06-12 03:06:18 +08:00
|
|
|
bool MustSkipToEndOfLine() const;
|
2018-01-31 03:54:47 +08:00
|
|
|
void NextChar();
|
2020-06-27 03:58:05 +08:00
|
|
|
void SkipToNextSignificantCharacter();
|
2019-03-16 04:57:41 +08:00
|
|
|
void SkipCComments();
|
2018-01-31 03:54:47 +08:00
|
|
|
void SkipSpaces();
|
2018-07-31 03:56:58 +08:00
|
|
|
static const char *SkipWhiteSpace(const char *);
|
2019-03-16 04:57:41 +08:00
|
|
|
const char *SkipWhiteSpaceAndCComments(const char *) const;
|
|
|
|
const char *SkipCComment(const char *) const;
|
2018-05-11 04:48:55 +08:00
|
|
|
bool NextToken(TokenSequence &);
|
|
|
|
bool ExponentAndKind(TokenSequence &);
|
2019-06-19 04:46:54 +08:00
|
|
|
void QuotedCharacterLiteral(TokenSequence &, const char *start);
|
2018-08-04 05:00:36 +08:00
|
|
|
void Hollerith(TokenSequence &, int count, const char *start);
|
2018-05-11 04:48:55 +08:00
|
|
|
bool PadOutCharacterLiteral(TokenSequence &);
|
2019-03-15 03:19:26 +08:00
|
|
|
bool SkipCommentLine(bool afterAmpersand);
|
2018-03-21 01:59:07 +08:00
|
|
|
bool IsFixedFormCommentLine(const char *) const;
|
2019-03-16 04:57:41 +08:00
|
|
|
const char *IsFreeFormComment(const char *) const;
|
2018-03-24 06:14:52 +08:00
|
|
|
std::optional<std::size_t> IsIncludeLine(const char *) const;
|
2018-04-04 23:06:15 +08:00
|
|
|
void FortranInclude(const char *quote);
|
2018-05-03 07:09:12 +08:00
|
|
|
const char *IsPreprocessorDirectiveLine(const char *) const;
|
2018-05-11 04:48:55 +08:00
|
|
|
const char *FixedFormContinuationLine(bool mightNeedSpace);
|
2018-05-10 06:32:22 +08:00
|
|
|
const char *FreeFormContinuationLine(bool ampersand);
|
2020-07-22 08:57:06 +08:00
|
|
|
bool IsImplicitContinuation() const;
|
2018-05-11 04:48:55 +08:00
|
|
|
bool FixedFormContinuation(bool mightNeedSpace);
|
2018-01-31 03:54:47 +08:00
|
|
|
bool FreeFormContinuation();
|
2019-06-12 03:50:27 +08:00
|
|
|
bool Continuation(bool mightNeedFixedFormSpace);
|
2018-03-24 06:14:52 +08:00
|
|
|
std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
|
|
|
|
const char *) const;
|
|
|
|
std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
|
|
|
|
const char *) const;
|
|
|
|
const char *IsCompilerDirectiveSentinel(const char *) const;
|
|
|
|
LineClassification ClassifyLine(const char *) const;
|
|
|
|
void SourceFormChange(std::string &&);
|
|
|
|
|
|
|
|
Messages &messages_;
|
|
|
|
CookedSource &cooked_;
|
|
|
|
Preprocessor &preprocessor_;
|
2020-09-18 03:19:42 +08:00
|
|
|
AllSources &allSources_;
|
2019-11-07 03:15:03 +08:00
|
|
|
common::LanguageFeatureControl features_;
|
2018-03-24 06:14:52 +08:00
|
|
|
bool inFixedForm_{false};
|
|
|
|
int fixedFormColumnLimit_{72};
|
2019-06-12 01:34:58 +08:00
|
|
|
Encoding encoding_{Encoding::UTF_8};
|
2018-03-24 06:14:52 +08:00
|
|
|
int delimiterNesting_{0};
|
2018-04-07 05:20:29 +08:00
|
|
|
int prescannerNesting_{0};
|
2018-02-10 06:04:11 +08:00
|
|
|
|
2018-02-16 02:42:36 +08:00
|
|
|
Provenance startProvenance_;
|
2020-03-29 12:00:16 +08:00
|
|
|
const char *start_{nullptr}; // beginning of current source file content
|
|
|
|
const char *limit_{nullptr}; // first address after end of current source
|
|
|
|
const char *nextLine_{nullptr}; // next line to process; <= limit_
|
|
|
|
const char *directiveSentinel_{nullptr}; // current compiler directive
|
2018-03-24 06:14:52 +08:00
|
|
|
|
|
|
|
// This data members are state for processing the source line containing
|
2019-06-12 03:06:18 +08:00
|
|
|
// "at_", which goes to up to the newline character before "nextLine_".
|
2020-03-29 12:00:16 +08:00
|
|
|
const char *at_{nullptr}; // next character to process; < nextLine_
|
|
|
|
int column_{1}; // card image column position of next character
|
2018-02-10 06:04:11 +08:00
|
|
|
bool tabInCurrentLine_{false};
|
2020-08-08 07:44:08 +08:00
|
|
|
bool slashInCurrentStatement_{false};
|
|
|
|
bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
|
2018-01-31 03:54:47 +08:00
|
|
|
bool inCharLiteral_{false};
|
|
|
|
bool inPreprocessorDirective_{false};
|
2018-03-24 06:14:52 +08:00
|
|
|
|
2018-05-11 04:48:55 +08:00
|
|
|
// In some edge cases of compiler directive continuation lines, it
|
|
|
|
// is necessary to treat the line break as a space character by
|
|
|
|
// setting this flag, which is cleared by EmitChar().
|
|
|
|
bool insertASpace_{false};
|
|
|
|
|
2019-03-15 03:19:26 +08:00
|
|
|
// When a free form continuation marker (&) appears at the end of a line
|
|
|
|
// before a INCLUDE or #include, we delete it and omit the newline, so
|
|
|
|
// that the first line of the included file is truly a continuation of
|
|
|
|
// the line before. Also used when the & appears at the end of the last
|
|
|
|
// line in an include file.
|
|
|
|
bool omitNewline_{false};
|
2019-03-15 04:53:35 +08:00
|
|
|
bool skipLeadingAmpersand_{false};
|
2019-03-15 03:19:26 +08:00
|
|
|
|
2018-03-24 06:14:52 +08:00
|
|
|
const Provenance spaceProvenance_{
|
2020-09-01 03:22:24 +08:00
|
|
|
allSources_.CompilerInsertionProvenance(' ')};
|
2018-03-24 06:14:52 +08:00
|
|
|
const Provenance backslashProvenance_{
|
2020-09-01 03:22:24 +08:00
|
|
|
allSources_.CompilerInsertionProvenance('\\')};
|
2018-03-21 01:59:07 +08:00
|
|
|
|
|
|
|
// To avoid probing the set of active compiler directive sentinel strings
|
|
|
|
// on every comment line, they're checked first with a cheap Bloom filter.
|
|
|
|
static const int prime1{1019}, prime2{1021};
|
2020-03-29 12:00:16 +08:00
|
|
|
std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
|
2018-03-21 01:59:07 +08:00
|
|
|
std::unordered_set<std::string> compilerDirectiveSentinels_;
|
2018-01-31 03:54:47 +08:00
|
|
|
};
|
2020-03-29 12:00:16 +08:00
|
|
|
} // namespace Fortran::parser
|
|
|
|
#endif // FORTRAN_PARSER_PRESCAN_H_
|