forked from OSchip/llvm-project
[flang] Character set and encoding changes
Original-commit: flang-compiler/f18@a550cbd4c0 Reviewed-on: https://github.com/flang-compiler/f18/pull/496 Tree-same-pre-rewrite: false
This commit is contained in:
parent
e7a21f763a
commit
15af2b2f06
|
@ -56,7 +56,8 @@ std::ostream &ConstantBase<RESULT, VALUE>::AsFortran(std::ostream &o) const {
|
|||
Result::category == TypeCategory::Complex) {
|
||||
value.AsFortran(o, Result::kind);
|
||||
} else if constexpr (Result::category == TypeCategory::Character) {
|
||||
o << Result::kind << '_' << parser::QuoteCharacterLiteral(value);
|
||||
o << Result::kind << '_'
|
||||
<< parser::QuoteCharacterLiteral(value, true, false);
|
||||
} else if constexpr (Result::category == TypeCategory::Logical) {
|
||||
if (value.IsTrue()) {
|
||||
o << ".true.";
|
||||
|
@ -92,7 +93,9 @@ std::ostream &Constant<Type<TypeCategory::Character, KIND>>::AsFortran(
|
|||
} else if (Rank() == 0) {
|
||||
o << Result::kind << '_';
|
||||
}
|
||||
o << parser::QuoteCharacterLiteral(value);
|
||||
o << parser::QuoteCharacterLiteral(value, true /* double quotes */,
|
||||
false /* avoid backslash escapes */,
|
||||
parser::Encoding::UTF_8 /* module files are UTF-8 */);
|
||||
}
|
||||
if (Rank() > 0) {
|
||||
o << ']';
|
||||
|
|
|
@ -78,10 +78,6 @@ static std::optional<int> (*CharacterCounter(Encoding encoding))(const char *) {
|
|||
}
|
||||
}
|
||||
|
||||
std::optional<int> CharacterBytes(const char *p, Encoding encoding) {
|
||||
return CharacterCounter(encoding)(p);
|
||||
}
|
||||
|
||||
std::optional<int> CountCharacters(
|
||||
const char *p, std::size_t bytes, Encoding encoding) {
|
||||
std::size_t chars{0};
|
||||
|
@ -104,7 +100,7 @@ std::optional<int> CountCharacters(
|
|||
|
||||
template<typename STRING>
|
||||
std::string QuoteCharacterLiteralHelper(const STRING &str,
|
||||
bool doubleDoubleQuotes, bool doubleBackslash, Encoding encoding) {
|
||||
bool doubleDoubleQuotes, bool backslashEscapes, Encoding encoding) {
|
||||
std::string result{'"'};
|
||||
const auto emit{[&](char ch) { result += ch; }};
|
||||
for (auto ch : str) {
|
||||
|
@ -113,11 +109,11 @@ std::string QuoteCharacterLiteralHelper(const STRING &str,
|
|||
// char may be signed depending on host.
|
||||
char32_t ch32{static_cast<unsigned char>(ch)};
|
||||
EmitQuotedChar(
|
||||
ch32, emit, emit, doubleDoubleQuotes, doubleBackslash, encoding);
|
||||
ch32, emit, emit, doubleDoubleQuotes, backslashEscapes, encoding);
|
||||
} else {
|
||||
char32_t ch32{ch};
|
||||
EmitQuotedChar(
|
||||
ch32, emit, emit, doubleDoubleQuotes, doubleBackslash, encoding);
|
||||
ch32, emit, emit, doubleDoubleQuotes, backslashEscapes, encoding);
|
||||
}
|
||||
}
|
||||
result += '"';
|
||||
|
@ -125,24 +121,24 @@ std::string QuoteCharacterLiteralHelper(const STRING &str,
|
|||
}
|
||||
|
||||
std::string QuoteCharacterLiteral(const std::string &str,
|
||||
bool doubleDoubleQuotes, bool doubleBackslash, Encoding encoding) {
|
||||
bool doubleDoubleQuotes, bool backslashEscapes, Encoding encoding) {
|
||||
return QuoteCharacterLiteralHelper(
|
||||
str, doubleDoubleQuotes, doubleBackslash, encoding);
|
||||
str, doubleDoubleQuotes, backslashEscapes, encoding);
|
||||
}
|
||||
|
||||
std::string QuoteCharacterLiteral(const std::u16string &str,
|
||||
bool doubleDoubleQuotes, bool doubleBackslash, Encoding encoding) {
|
||||
bool doubleDoubleQuotes, bool backslashEscapes, Encoding encoding) {
|
||||
return QuoteCharacterLiteralHelper(
|
||||
str, doubleDoubleQuotes, doubleBackslash, encoding);
|
||||
str, doubleDoubleQuotes, backslashEscapes, encoding);
|
||||
}
|
||||
|
||||
std::string QuoteCharacterLiteral(const std::u32string &str,
|
||||
bool doubleDoubleQuotes, bool doubleBackslash, Encoding encoding) {
|
||||
bool doubleDoubleQuotes, bool backslashEscapes, Encoding encoding) {
|
||||
return QuoteCharacterLiteralHelper(
|
||||
str, doubleDoubleQuotes, doubleBackslash, encoding);
|
||||
str, doubleDoubleQuotes, backslashEscapes, encoding);
|
||||
}
|
||||
|
||||
EncodedCharacter EncodeLATIN_1(char codepoint) {
|
||||
EncodedCharacter EncodeLATIN_1(char32_t codepoint) {
|
||||
CHECK(codepoint <= 0xff);
|
||||
EncodedCharacter result;
|
||||
result.buffer[0] = codepoint;
|
||||
|
@ -178,7 +174,7 @@ EncodedCharacter EncodeUTF_8(char32_t codepoint) {
|
|||
return result;
|
||||
}
|
||||
|
||||
EncodedCharacter EncodeEUC_JP(char16_t codepoint) {
|
||||
EncodedCharacter EncodeEUC_JP(char32_t codepoint) {
|
||||
// Assume JIS X 0208 (TODO: others)
|
||||
CHECK(codepoint <= 0x6e6e);
|
||||
EncodedCharacter result;
|
||||
|
@ -205,64 +201,111 @@ EncodedCharacter EncodeCharacter(Encoding encoding, char32_t codepoint) {
|
|||
DecodedCharacter DecodeUTF_8Character(const char *cp, std::size_t bytes) {
|
||||
auto p{reinterpret_cast<const std::uint8_t *>(cp)};
|
||||
char32_t ch{*p};
|
||||
int charBytes{1};
|
||||
if (ch >= 0x80) {
|
||||
if ((ch & 0xf8) == 0xf0 && bytes >= 4 && ch > 0xf0 &&
|
||||
((p[1] | p[2] | p[3]) & 0xc0) == 0x80) {
|
||||
charBytes = 4;
|
||||
ch = ((ch & 7) << 6) | (p[1] & 0x3f);
|
||||
ch = (ch << 6) | (p[2] & 0x3f);
|
||||
ch = (ch << 6) | (p[3] & 0x3f);
|
||||
} else if ((ch & 0xf0) == 0xe0 && bytes >= 3 && ch > 0xe0 &&
|
||||
((p[1] | p[2]) & 0xc0) == 0x80) {
|
||||
charBytes = 3;
|
||||
ch = ((ch & 0xf) << 6) | (p[1] & 0x3f);
|
||||
ch = (ch << 6) | (p[2] & 0x3f);
|
||||
} else if ((ch & 0xe0) == 0xc0 && bytes >= 2 && ch > 0xc0 &&
|
||||
(p[1] & 0xc0) == 0x80) {
|
||||
charBytes = 2;
|
||||
ch = ((ch & 0x1f) << 6) | (p[1] & 0x3f);
|
||||
} else {
|
||||
return {}; // not valid UTF-8
|
||||
}
|
||||
if (ch <= 0x7f) {
|
||||
return {ch, 1};
|
||||
} else if ((ch & 0xf8) == 0xf0 && bytes >= 4 && ch > 0xf0 &&
|
||||
((p[1] | p[2] | p[3]) & 0xc0) == 0x80) {
|
||||
ch = ((ch & 7) << 6) | (p[1] & 0x3f);
|
||||
ch = (ch << 6) | (p[2] & 0x3f);
|
||||
ch = (ch << 6) | (p[3] & 0x3f);
|
||||
return {ch, 4};
|
||||
} else if ((ch & 0xf0) == 0xe0 && bytes >= 3 && ch > 0xe0 &&
|
||||
((p[1] | p[2]) & 0xc0) == 0x80) {
|
||||
ch = ((ch & 0xf) << 6) | (p[1] & 0x3f);
|
||||
ch = (ch << 6) | (p[2] & 0x3f);
|
||||
return {ch, 3};
|
||||
} else if ((ch & 0xe0) == 0xc0 && bytes >= 2 && ch > 0xc0 &&
|
||||
(p[1] & 0xc0) == 0x80) {
|
||||
ch = ((ch & 0x1f) << 6) | (p[1] & 0x3f);
|
||||
return {ch, 2};
|
||||
} else {
|
||||
return {}; // not valid UTF-8
|
||||
}
|
||||
return {ch, charBytes};
|
||||
}
|
||||
|
||||
DecodedCharacter DecodeEUC_JPCharacter(const char *cp, std::size_t bytes) {
|
||||
auto p{reinterpret_cast<const std::uint8_t *>(cp)};
|
||||
char32_t ch{*p};
|
||||
int charBytes{1};
|
||||
if (ch >= 0x80) {
|
||||
if (bytes >= 2 && ch == 0x8e && p[1] >= 0xa1 && p[1] <= 0xdf) {
|
||||
charBytes = 2; // JIS X 0201
|
||||
ch = p[1];
|
||||
} else if (bytes >= 3 && ch == 0x8f && p[1] >= 0xa1 && p[1] <= 0xfe &&
|
||||
p[2] >= 0xa1 && p[2] <= 0xfe) {
|
||||
charBytes = 3; // JIS X 0212
|
||||
ch = (p[1] & 0x7f) << 8 | (p[1] & 0x7f);
|
||||
} else if (bytes >= 2 && ch >= 0xa1 && ch <= 0xfe && p[1] >= 0x1 &&
|
||||
p[1] <= 0xfe) {
|
||||
charBytes = 2; // JIS X 0208
|
||||
ch = ((ch & 0x7f) << 8) | (p[1] & 0x7f);
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
if (ch <= 0x7f) {
|
||||
return {ch, 1};
|
||||
} else if (ch >= 0xa1 && ch <= 0xfe && bytes >= 2 && p[1] >= 0xa1 &&
|
||||
p[1] <= 0xfe) {
|
||||
ch = ((ch & 0x7f) << 8) | (p[1] & 0x7f); // JIS X 0208
|
||||
return {ch, 2};
|
||||
} else if (ch == 0x8e && bytes >= 2 && p[1] >= 0xa1 && p[1] <= 0xdf) {
|
||||
return {p[1], 2}; // JIS X 0201
|
||||
} else if (ch == 0x8f && bytes >= 3 && p[1] >= 0xa1 && p[1] <= 0xfe &&
|
||||
p[2] >= 0xa1 && p[2] <= 0xfe) {
|
||||
ch = (p[1] & 0x7f) << 8 | (p[1] & 0x7f); // JIS X 0212
|
||||
return {ch, 3};
|
||||
} else {
|
||||
return {}; // not valid EUC_JP
|
||||
}
|
||||
return {ch, charBytes};
|
||||
}
|
||||
|
||||
DecodedCharacter DecodeLATIN1Character(const char *cp) {
|
||||
return {*reinterpret_cast<const std::uint8_t *>(cp), 1};
|
||||
}
|
||||
|
||||
DecodedCharacter DecodeCharacter(
|
||||
static DecodedCharacter DecodeEscapedCharacter(
|
||||
const char *cp, std::size_t bytes) {
|
||||
if (cp[0] == '\\' && bytes > 1) {
|
||||
if (std::optional<char> escChar{BackslashEscapeValue(cp[1])}) {
|
||||
return {static_cast<char32_t>(*escChar), 2};
|
||||
}
|
||||
if (IsOctalDigit(cp[1])) {
|
||||
std::size_t maxDigits{static_cast<std::size_t>(cp[1] > '3' ? 2 : 3)};
|
||||
std::size_t maxLen{std::max(maxDigits + 1, bytes)};
|
||||
char32_t code{static_cast<char32_t>(cp[1] - '0')};
|
||||
std::size_t len{2}; // so far
|
||||
for (; len < maxLen && IsOctalDigit(cp[len]); ++len) {
|
||||
code = 8 * code + DecimalDigitValue(cp[len]);
|
||||
}
|
||||
return {code, static_cast<int>(len)};
|
||||
} else if (bytes >= 4 && ToLowerCaseLetter(cp[1]) == 'x' &&
|
||||
IsHexadecimalDigit(cp[2]) && IsHexadecimalDigit(cp[3])) {
|
||||
return {static_cast<char32_t>(16 * HexadecimalDigitValue(cp[2]) +
|
||||
HexadecimalDigitValue(cp[3])),
|
||||
4};
|
||||
}
|
||||
}
|
||||
return {static_cast<char32_t>(cp[0]), 1};
|
||||
}
|
||||
|
||||
static DecodedCharacter DecodeEscapedCharacters(
|
||||
Encoding encoding, const char *cp, std::size_t bytes) {
|
||||
switch (encoding) {
|
||||
case Encoding::LATIN_1: return DecodeLATIN1Character(cp);
|
||||
case Encoding::UTF_8: return DecodeUTF_8Character(cp, bytes);
|
||||
case Encoding::EUC_JP: return DecodeEUC_JPCharacter(cp, bytes);
|
||||
default: CRASH_NO_CASE;
|
||||
char buffer[4];
|
||||
int count[4];
|
||||
std::size_t at{0}, len{0};
|
||||
for (; len < 4 && at < bytes; ++len) {
|
||||
DecodedCharacter code{DecodeEscapedCharacter(cp + at, bytes - at)};
|
||||
buffer[len] = code.unicode;
|
||||
at += code.bytes;
|
||||
count[len] = at;
|
||||
}
|
||||
DecodedCharacter code{DecodeCharacter(encoding, buffer, len, false)};
|
||||
if (code.bytes > 0) {
|
||||
code.bytes = count[code.bytes - 1];
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
DecodedCharacter DecodeCharacter(Encoding encoding, const char *cp,
|
||||
std::size_t bytes, bool backslashEscapes) {
|
||||
if (backslashEscapes && bytes >= 1 && *cp == '\\') {
|
||||
return DecodeEscapedCharacters(encoding, cp, bytes);
|
||||
} else {
|
||||
switch (encoding) {
|
||||
case Encoding::LATIN_1:
|
||||
if (bytes >= 1) {
|
||||
return DecodeLATIN1Character(cp);
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
case Encoding::UTF_8: return DecodeUTF_8Character(cp, bytes);
|
||||
case Encoding::EUC_JP: return DecodeEUC_JPCharacter(cp, bytes);
|
||||
default: CRASH_NO_CASE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -109,32 +109,32 @@ inline constexpr char HexadecimalDigitValue(char ch) {
|
|||
|
||||
inline constexpr std::optional<char> BackslashEscapeValue(char ch) {
|
||||
switch (ch) {
|
||||
// case 'a': return {'\a'}; // pgf90 has no \a
|
||||
case 'b': return {'\b'};
|
||||
case 'f': return {'\f'};
|
||||
case 'n': return {'\n'};
|
||||
case 'r': return {'\r'};
|
||||
case 't': return {'\t'};
|
||||
case 'v': return {'\v'};
|
||||
// case 'a': return '\a'; // pgf90 has no \a
|
||||
case 'b': return '\b';
|
||||
case 'f': return '\f';
|
||||
case 'n': return '\n';
|
||||
case 'r': return '\r';
|
||||
case 't': return '\t';
|
||||
case 'v': return '\v';
|
||||
case '"':
|
||||
case '\'':
|
||||
case '\\': return {ch};
|
||||
case '\\': return ch;
|
||||
default: return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
inline constexpr std::optional<char> BackslashEscapeChar(char ch) {
|
||||
switch (ch) {
|
||||
// case '\a': return {'a'}; // pgf90 has no \a
|
||||
case '\b': return {'b'};
|
||||
case '\f': return {'f'};
|
||||
case '\n': return {'n'};
|
||||
case '\r': return {'r'};
|
||||
case '\t': return {'t'};
|
||||
case '\v': return {'v'};
|
||||
// case '\a': return 'a'; // pgf90 has no \a
|
||||
case '\b': return 'b';
|
||||
case '\f': return 'f';
|
||||
case '\n': return 'n';
|
||||
case '\r': return 'r';
|
||||
case '\t': return 't';
|
||||
case '\v': return 'v';
|
||||
case '"':
|
||||
case '\'':
|
||||
case '\\': return {ch};
|
||||
case '\\': return ch;
|
||||
default: return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
@ -144,56 +144,63 @@ struct EncodedCharacter {
|
|||
int bytes{0};
|
||||
};
|
||||
|
||||
EncodedCharacter EncodeLATIN_1(char);
|
||||
EncodedCharacter EncodeLATIN_1(char32_t);
|
||||
EncodedCharacter EncodeUTF_8(char32_t);
|
||||
EncodedCharacter EncodeEUC_JP(char16_t);
|
||||
EncodedCharacter EncodeEUC_JP(char32_t);
|
||||
EncodedCharacter EncodeCharacter(Encoding, char32_t);
|
||||
|
||||
// EmitQuotedChar drives callbacks "emit" and "insert" to output the
|
||||
// bytes of an encoding for a codepoint.
|
||||
template<typename NORMAL, typename INSERTED>
|
||||
void EmitQuotedChar(char32_t ch, const NORMAL &emit, const INSERTED &insert,
|
||||
bool doubleDoubleQuotes = true, bool doubleBackslash = true,
|
||||
bool doubleDoubleQuotes = true, bool backslashEscapes = true,
|
||||
Encoding encoding = Encoding::UTF_8) {
|
||||
auto emitOneChar{[&](std::uint8_t ch) {
|
||||
if (ch < ' ' || (backslashEscapes && (ch == '\\' || ch >= 0x7f))) {
|
||||
insert('\\');
|
||||
if (std::optional<char> escape{BackslashEscapeChar(ch)}) {
|
||||
emit(*escape);
|
||||
} else {
|
||||
// octal escape sequence
|
||||
if (ch > 077) {
|
||||
insert('0' + (ch >> 6));
|
||||
}
|
||||
if (ch > 07) {
|
||||
insert('0' + ((ch >> 3) & 7));
|
||||
}
|
||||
insert('0' + (ch & 7));
|
||||
}
|
||||
} else {
|
||||
emit(ch);
|
||||
}
|
||||
}};
|
||||
if (ch == '"') {
|
||||
if (doubleDoubleQuotes) {
|
||||
insert('"');
|
||||
}
|
||||
emit('"');
|
||||
} else if (ch == '\\') {
|
||||
if (doubleBackslash) {
|
||||
insert('\\');
|
||||
}
|
||||
emit('\\');
|
||||
} else if (ch < ' ' || (encoding == Encoding::LATIN_1 && ch >= 0x7f)) {
|
||||
insert('\\');
|
||||
if (std::optional<char> escape{BackslashEscapeChar(ch)}) {
|
||||
emit(*escape);
|
||||
} else {
|
||||
// octal escape sequence
|
||||
insert('0' + ((ch >> 6) & 3));
|
||||
insert('0' + ((ch >> 3) & 7));
|
||||
insert('0' + (ch & 7));
|
||||
}
|
||||
} else if (ch <= 0x7f) {
|
||||
emitOneChar(ch);
|
||||
} else {
|
||||
EncodedCharacter encoded{EncodeCharacter(encoding, ch)};
|
||||
for (int j{0}; j < encoded.bytes; ++j) {
|
||||
emit(encoded.buffer[j]);
|
||||
emitOneChar(encoded.buffer[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string QuoteCharacterLiteral(const std::string &,
|
||||
bool doubleDoubleQuotes = true, bool doubleBackslash = true,
|
||||
bool doubleDoubleQuotes = true, bool backslashEscapes = true,
|
||||
Encoding = Encoding::LATIN_1);
|
||||
std::string QuoteCharacterLiteral(const std::u16string &,
|
||||
bool doubleDoubleQuotes = true, bool doubleBackslash = true,
|
||||
bool doubleDoubleQuotes = true, bool backslashEscapes = true,
|
||||
Encoding = Encoding::EUC_JP);
|
||||
std::string QuoteCharacterLiteral(const std::u32string &,
|
||||
bool doubleDoubleQuotes = true, bool doubleBackslash = true,
|
||||
bool doubleDoubleQuotes = true, bool backslashEscapes = true,
|
||||
Encoding = Encoding::UTF_8);
|
||||
|
||||
std::optional<int> UTF_8CharacterBytes(const char *);
|
||||
std::optional<int> EUC_JPCharacterBytes(const char *);
|
||||
std::optional<int> CharacterBytes(const char *, Encoding);
|
||||
std::optional<int> CountCharacters(const char *, std::size_t bytes, Encoding);
|
||||
|
||||
struct DecodedCharacter {
|
||||
|
@ -204,7 +211,8 @@ struct DecodedCharacter {
|
|||
DecodedCharacter DecodeUTF_8Character(const char *, std::size_t);
|
||||
DecodedCharacter DecodeEUC_JPCharacter(const char *, std::size_t);
|
||||
DecodedCharacter DecodeLATIN1Character(const char *);
|
||||
DecodedCharacter DecodeCharacter(Encoding, const char *, std::size_t);
|
||||
DecodedCharacter DecodeCharacter(
|
||||
Encoding, const char *, std::size_t, bool backslashEscapes = false);
|
||||
|
||||
std::u32string DecodeUTF_8(const std::string &);
|
||||
std::u16string DecodeEUC_JP(const std::string &);
|
||||
|
|
|
@ -44,7 +44,7 @@ public:
|
|||
ParseState(const ParseState &that)
|
||||
: p_{that.p_}, limit_{that.limit_}, context_{that.context_},
|
||||
userState_{that.userState_}, inFixedForm_{that.inFixedForm_},
|
||||
encoding_{that.encoding_}, anyErrorRecovery_{that.anyErrorRecovery_},
|
||||
anyErrorRecovery_{that.anyErrorRecovery_},
|
||||
anyConformanceViolation_{that.anyConformanceViolation_},
|
||||
deferMessages_{that.deferMessages_},
|
||||
anyDeferredMessages_{that.anyDeferredMessages_},
|
||||
|
@ -52,7 +52,7 @@ public:
|
|||
ParseState(ParseState &&that)
|
||||
: p_{that.p_}, limit_{that.limit_}, messages_{std::move(that.messages_)},
|
||||
context_{std::move(that.context_)}, userState_{that.userState_},
|
||||
inFixedForm_{that.inFixedForm_}, encoding_{that.encoding_},
|
||||
inFixedForm_{that.inFixedForm_},
|
||||
anyErrorRecovery_{that.anyErrorRecovery_},
|
||||
anyConformanceViolation_{that.anyConformanceViolation_},
|
||||
deferMessages_{that.deferMessages_},
|
||||
|
@ -61,7 +61,6 @@ public:
|
|||
ParseState &operator=(const ParseState &that) {
|
||||
p_ = that.p_, limit_ = that.limit_, context_ = that.context_;
|
||||
userState_ = that.userState_, inFixedForm_ = that.inFixedForm_;
|
||||
encoding_ = that.encoding_;
|
||||
anyErrorRecovery_ = that.anyErrorRecovery_;
|
||||
anyConformanceViolation_ = that.anyConformanceViolation_;
|
||||
deferMessages_ = that.deferMessages_;
|
||||
|
@ -73,7 +72,6 @@ public:
|
|||
p_ = that.p_, limit_ = that.limit_, messages_ = std::move(that.messages_);
|
||||
context_ = std::move(that.context_);
|
||||
userState_ = that.userState_, inFixedForm_ = that.inFixedForm_;
|
||||
encoding_ = that.encoding_;
|
||||
anyErrorRecovery_ = that.anyErrorRecovery_;
|
||||
anyConformanceViolation_ = that.anyConformanceViolation_;
|
||||
deferMessages_ = that.deferMessages_;
|
||||
|
@ -106,12 +104,6 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
Encoding encoding() const { return encoding_; }
|
||||
ParseState &set_encoding(Encoding encoding) {
|
||||
encoding_ = encoding;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool deferMessages() const { return deferMessages_; }
|
||||
ParseState &set_deferMessages(bool yes = true) {
|
||||
deferMessages_ = yes;
|
||||
|
@ -231,7 +223,6 @@ private:
|
|||
UserState *userState_{nullptr};
|
||||
|
||||
bool inFixedForm_{false};
|
||||
Encoding encoding_{Encoding::UTF_8};
|
||||
bool anyErrorRecovery_{false};
|
||||
bool anyConformanceViolation_{false};
|
||||
bool deferMessages_{false};
|
||||
|
|
|
@ -69,7 +69,6 @@ void Parsing::Prescan(const std::string &path, Options options) {
|
|||
Prescanner prescanner{messages_, cooked_, preprocessor, options.features};
|
||||
prescanner.set_fixedForm(options.isFixedForm)
|
||||
.set_fixedFormColumnLimit(options.fixedFormColumns)
|
||||
.set_encoding(options.encoding)
|
||||
.AddCompilerDirectiveSentinel("dir$");
|
||||
if (options.features.IsEnabled(LanguageFeature::OpenMP)) {
|
||||
prescanner.AddCompilerDirectiveSentinel("$omp");
|
||||
|
@ -102,9 +101,7 @@ void Parsing::Parse(std::ostream *out) {
|
|||
.set_instrumentedParse(options_.instrumentedParse)
|
||||
.set_log(&log_);
|
||||
ParseState parseState{cooked_};
|
||||
parseState.set_inFixedForm(options_.isFixedForm)
|
||||
.set_encoding(options_.encoding)
|
||||
.set_userState(&userState);
|
||||
parseState.set_inFixedForm(options_.isFixedForm).set_userState(&userState);
|
||||
parseTree_ = program.Parse(parseState);
|
||||
CHECK(
|
||||
!parseState.anyErrorRecovery() || parseState.messages().AnyFatalError());
|
||||
|
|
|
@ -37,7 +37,6 @@ struct Options {
|
|||
bool isFixedForm{false};
|
||||
int fixedFormColumns{72};
|
||||
LanguageFeatureControl features;
|
||||
Encoding encoding{Encoding::UTF_8};
|
||||
std::vector<std::string> searchDirectories;
|
||||
std::vector<Predefinition> predefinitions;
|
||||
bool instrumentedParse{false};
|
||||
|
|
|
@ -588,7 +588,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
|
|||
} else if (included->bytes() > 0) {
|
||||
ProvenanceRange fileRange{
|
||||
allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
|
||||
Prescanner{*prescanner}.Prescan(fileRange);
|
||||
Prescanner{*prescanner}
|
||||
.set_encoding(included->encoding())
|
||||
.Prescan(fileRange);
|
||||
}
|
||||
} else {
|
||||
prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
|
||||
|
|
|
@ -31,8 +31,8 @@ static constexpr int maxPrescannerNesting{100};
|
|||
|
||||
Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
|
||||
Preprocessor &preprocessor, LanguageFeatureControl lfc)
|
||||
: messages_{messages}, cooked_{cooked},
|
||||
preprocessor_{preprocessor}, features_{lfc} {}
|
||||
: messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
|
||||
features_{lfc}, encoding_{cooked.allSources().encoding()} {}
|
||||
|
||||
Prescanner::Prescanner(const Prescanner &that)
|
||||
: messages_{that.messages_}, cooked_{that.cooked_},
|
||||
|
@ -295,6 +295,11 @@ bool Prescanner::MustSkipToEndOfLine() const {
|
|||
void Prescanner::NextChar() {
|
||||
CHECK(*at_ != '\n');
|
||||
++at_, ++column_;
|
||||
while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
|
||||
// UTF-8 byte order mark - treat this file as UTF-8
|
||||
at_ += 3;
|
||||
encoding_ = Encoding::UTF_8;
|
||||
}
|
||||
if (inPreprocessorDirective_) {
|
||||
SkipCComments();
|
||||
} else {
|
||||
|
@ -477,10 +482,18 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
|
|||
}
|
||||
preventHollerith_ = false;
|
||||
} else if (IsLegalInIdentifier(*at_)) {
|
||||
while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
|
||||
}
|
||||
// Look for NC'...' prefix - legacy PGI "Kanji" NCHARACTER literal
|
||||
char buffer[2];
|
||||
int idChars{0};
|
||||
do {
|
||||
if (idChars < static_cast<int>(sizeof buffer)) {
|
||||
buffer[idChars] = ToLowerCaseLetter(*at_);
|
||||
}
|
||||
++idChars;
|
||||
} while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_)));
|
||||
if (*at_ == '\'' || *at_ == '"') {
|
||||
QuotedCharacterLiteral(tokens, start);
|
||||
bool isKanji{idChars == 2 && buffer[0] == 'n' && buffer[1] == 'c'};
|
||||
QuotedCharacterLiteral(tokens, start, isKanji);
|
||||
preventHollerith_ = false;
|
||||
} else {
|
||||
// Subtle: Don't misrecognize labeled DO statement label as Hollerith
|
||||
|
@ -522,7 +535,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
|
|||
}
|
||||
|
||||
bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
|
||||
char ed = ToLowerCaseLetter(*at_);
|
||||
char ed{ToLowerCaseLetter(*at_)};
|
||||
if (ed != 'e' && ed != 'd') {
|
||||
return false;
|
||||
}
|
||||
|
@ -541,7 +554,7 @@ bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
|
|||
}
|
||||
|
||||
void Prescanner::QuotedCharacterLiteral(
|
||||
TokenSequence &tokens, const char *start) {
|
||||
TokenSequence &tokens, const char *start, bool isKanji) {
|
||||
char quote{*at_};
|
||||
const char *end{at_ + 1};
|
||||
inCharLiteral_ = true;
|
||||
|
@ -549,9 +562,14 @@ void Prescanner::QuotedCharacterLiteral(
|
|||
const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
|
||||
bool escape{false};
|
||||
bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
|
||||
Encoding encoding{encoding_};
|
||||
if (isKanji) {
|
||||
// NC'...' - the contents are EUC_JP even if the context is not
|
||||
encoding = Encoding::EUC_JP;
|
||||
}
|
||||
while (true) {
|
||||
DecodedCharacter decoded{DecodeCharacter(
|
||||
encoding_, at_, static_cast<std::size_t>(limit_ - at_))};
|
||||
encoding, at_, static_cast<std::size_t>(limit_ - at_), escapesEnabled)};
|
||||
if (decoded.bytes <= 0) {
|
||||
Say(GetProvenanceRange(start, end),
|
||||
"Bad character in character literal"_err_en_US);
|
||||
|
@ -559,7 +577,9 @@ void Prescanner::QuotedCharacterLiteral(
|
|||
}
|
||||
char32_t ch{decoded.unicode};
|
||||
escape = !escape && ch == '\\' && escapesEnabled;
|
||||
EmitQuotedChar(ch, emit, insert, false, !escapesEnabled);
|
||||
EmitQuotedChar(ch, emit, insert, false /* don't double quotes */,
|
||||
true /* use backslash escapes */,
|
||||
Encoding::UTF_8 /* cooked char stream is UTF-8 only */);
|
||||
while (PadOutCharacterLiteral(tokens)) {
|
||||
}
|
||||
if (*at_ == '\n') {
|
||||
|
@ -613,7 +633,7 @@ void Prescanner::Hollerith(
|
|||
for (int j{0}; j < utf8.bytes; ++j) {
|
||||
EmitChar(tokens, utf8.buffer[j]);
|
||||
}
|
||||
at_ += decoded.bytes;
|
||||
at_ += decoded.bytes - 1;
|
||||
} else {
|
||||
Say(GetProvenanceRange(start, at_),
|
||||
"Bad character in Hollerith literal"_err_en_US);
|
||||
|
@ -746,7 +766,7 @@ void Prescanner::FortranInclude(const char *firstQuote) {
|
|||
provenance, static_cast<std::size_t>(p - nextLine_)};
|
||||
ProvenanceRange fileRange{
|
||||
allSources.AddIncludedFile(*included, includeLineRange)};
|
||||
Prescanner{*this}.Prescan(fileRange);
|
||||
Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -158,7 +158,8 @@ private:
|
|||
const char *SkipCComment(const char *) const;
|
||||
bool NextToken(TokenSequence &);
|
||||
bool ExponentAndKind(TokenSequence &);
|
||||
void QuotedCharacterLiteral(TokenSequence &, const char *start);
|
||||
void QuotedCharacterLiteral(
|
||||
TokenSequence &, const char *start, bool isKanji = false);
|
||||
void Hollerith(TokenSequence &, int count, const char *start);
|
||||
bool PadOutCharacterLiteral(TokenSequence &);
|
||||
bool SkipCommentLine(bool afterAmpersand);
|
||||
|
|
|
@ -108,7 +108,7 @@ std::string AllSources::PopSearchPathDirectory() {
|
|||
}
|
||||
|
||||
const SourceFile *AllSources::Open(std::string path, std::stringstream *error) {
|
||||
std::unique_ptr<SourceFile> source{std::make_unique<SourceFile>()};
|
||||
std::unique_ptr<SourceFile> source{std::make_unique<SourceFile>(encoding_)};
|
||||
if (source->Open(LocateSourceFile(path, searchPath_), error)) {
|
||||
return ownedSourceFiles_.emplace_back(std::move(source)).get();
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ const SourceFile *AllSources::Open(std::string path, std::stringstream *error) {
|
|||
}
|
||||
|
||||
const SourceFile *AllSources::ReadStandardInput(std::stringstream *error) {
|
||||
std::unique_ptr<SourceFile> source{std::make_unique<SourceFile>()};
|
||||
std::unique_ptr<SourceFile> source{std::make_unique<SourceFile>(encoding_)};
|
||||
if (source->ReadStandardInput(error)) {
|
||||
return ownedSourceFiles_.emplace_back(std::move(source)).get();
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "char-block.h"
|
||||
#include "char-buffer.h"
|
||||
#include "characters.h"
|
||||
#include "source.h"
|
||||
#include "../common/idioms.h"
|
||||
#include "../common/interval.h"
|
||||
|
@ -117,6 +118,11 @@ public:
|
|||
|
||||
std::size_t size() const { return range_.size(); }
|
||||
const char &operator[](Provenance) const;
|
||||
Encoding encoding() const { return encoding_; }
|
||||
AllSources &set_encoding(Encoding e) {
|
||||
encoding_ = e;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void PushSearchPathDirectory(std::string);
|
||||
std::string PopSearchPathDirectory();
|
||||
|
@ -181,6 +187,7 @@ private:
|
|||
std::map<char, Provenance> compilerInsertionProvenance_;
|
||||
std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_;
|
||||
std::vector<std::string> searchPath_;
|
||||
Encoding encoding_{Encoding::UTF_8};
|
||||
};
|
||||
|
||||
class CookedSource {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -60,17 +60,18 @@ void SourceFile::RecordLineStarts() {
|
|||
lineStart_ = FindLineStarts(content_, bytes_);
|
||||
}
|
||||
|
||||
// Cut down the contiguous content of a source file to skip
|
||||
// things like byte order marks.
|
||||
// Check for a Unicode byte order mark (BOM).
|
||||
// Module files all have one; so can source files.
|
||||
void SourceFile::IdentifyPayload() {
|
||||
content_ = address_;
|
||||
bytes_ = size_;
|
||||
if (content_ != nullptr) {
|
||||
static constexpr int BOMBytes{3};
|
||||
static const char UTF8_BOM[]{"\xef\xbb\xbf"};
|
||||
if (bytes_ >= sizeof UTF8_BOM &&
|
||||
std::memcmp(content_, UTF8_BOM, sizeof UTF8_BOM) == 0) {
|
||||
content_ += sizeof UTF8_BOM;
|
||||
bytes_ -= sizeof UTF8_BOM;
|
||||
if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) {
|
||||
content_ += BOMBytes;
|
||||
bytes_ -= BOMBytes;
|
||||
encoding_ = Encoding::UTF_8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -18,7 +18,9 @@
|
|||
// Source file content is lightly normalized when the file is read.
|
||||
// - Line ending markers are converted to single newline characters
|
||||
// - A newline character is added to the last line of the file if one is needed
|
||||
// - A Unicode byte order mark is recognized if present.
|
||||
|
||||
#include "characters.h"
|
||||
#include <cstddef>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
@ -33,12 +35,13 @@ std::string LocateSourceFile(
|
|||
|
||||
class SourceFile {
|
||||
public:
|
||||
SourceFile() {}
|
||||
explicit SourceFile(Encoding e) : encoding_{e} {}
|
||||
~SourceFile();
|
||||
std::string path() const { return path_; }
|
||||
const char *content() const { return content_; }
|
||||
std::size_t bytes() const { return bytes_; }
|
||||
std::size_t lines() const { return lineStart_.size(); }
|
||||
Encoding encoding() const { return encoding_; }
|
||||
|
||||
bool Open(std::string path, std::stringstream *error);
|
||||
bool ReadStandardInput(std::stringstream *error);
|
||||
|
@ -62,6 +65,7 @@ private:
|
|||
std::size_t bytes_{0};
|
||||
std::vector<std::size_t> lineStart_;
|
||||
std::string normalized_;
|
||||
Encoding encoding_{Encoding::UTF_8};
|
||||
};
|
||||
}
|
||||
#endif // FORTRAN_PARSER_SOURCE_H_
|
||||
|
|
|
@ -207,70 +207,41 @@ template<class PA> inline constexpr auto bracketed(const PA &p) {
|
|||
|
||||
// Quoted character literal constants.
|
||||
struct CharLiteralChar {
|
||||
struct Result {
|
||||
Result(char c, bool esc) : ch{c}, wasEscaped{esc} {}
|
||||
static Result Bare(char c) { return Result{c, false}; }
|
||||
static Result Escaped(char c) { return Result{c, true}; }
|
||||
char ch;
|
||||
bool wasEscaped;
|
||||
};
|
||||
using resultType = Result;
|
||||
static std::optional<Result> Parse(ParseState &state) {
|
||||
using resultType = char;
|
||||
static std::optional<char> Parse(ParseState &state) {
|
||||
auto at{state.GetLocation()};
|
||||
std::optional<const char *> och{nextCh.Parse(state)};
|
||||
if (!och.has_value()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
char ch{**och};
|
||||
if (ch == '\n') {
|
||||
state.Say(CharBlock{at, state.GetLocation()},
|
||||
"unclosed character constant"_err_en_US);
|
||||
return std::nullopt;
|
||||
}
|
||||
if (ch != '\\') {
|
||||
return Result::Bare(ch);
|
||||
}
|
||||
if (!(och = nextCh.Parse(state)).has_value()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
ch = **och;
|
||||
if (ch == '\n') {
|
||||
state.Say(CharBlock{at, state.GetLocation()},
|
||||
"unclosed character constant"_err_en_US);
|
||||
return std::nullopt;
|
||||
}
|
||||
if (std::optional<char> escChar{BackslashEscapeValue(ch)}) {
|
||||
return Result::Escaped(*escChar);
|
||||
}
|
||||
if (IsOctalDigit(ch)) {
|
||||
ch -= '0';
|
||||
for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
|
||||
static constexpr auto octalDigit{attempt("01234567"_ch)};
|
||||
och = octalDigit.Parse(state);
|
||||
if (och.has_value()) {
|
||||
ch = 8 * ch + **och - '0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (ch == 'x' || ch == 'X') {
|
||||
ch = 0;
|
||||
static constexpr auto hexDigit{"0123456789abcdefABCDEF"_ch};
|
||||
och = hexDigit.Parse(state);
|
||||
if (och.has_value()) {
|
||||
ch = HexadecimalDigitValue(**och);
|
||||
static constexpr auto hexDigit2{attempt("0123456789abcdefABCDEF"_ch)};
|
||||
och = hexDigit2.Parse(state);
|
||||
if (och.has_value()) {
|
||||
ch = 16 * ch + HexadecimalDigitValue(**och);
|
||||
}
|
||||
} else {
|
||||
if (std::optional<const char *> cp{nextCh.Parse(state)}) {
|
||||
if (**cp == '\n') {
|
||||
state.Say(CharBlock{at, state.GetLocation()},
|
||||
"Unclosed character constant"_err_en_US);
|
||||
return std::nullopt;
|
||||
}
|
||||
} else {
|
||||
state.Say(at, "bad escaped character"_en_US);
|
||||
if (**cp != '\\') {
|
||||
return **cp;
|
||||
}
|
||||
if (!(cp = nextCh.Parse(state)).has_value()) {
|
||||
state.Say(CharBlock{at, state.GetLocation()},
|
||||
"Unclosed character constant"_err_en_US);
|
||||
return std::nullopt;
|
||||
}
|
||||
if (std::optional<char> escChar{BackslashEscapeValue(**cp)}) {
|
||||
return escChar;
|
||||
}
|
||||
if (IsOctalDigit(**cp)) {
|
||||
int result{**cp - '0'};
|
||||
for (int j = (result > 3 ? 1 : 2); j-- > 0;) {
|
||||
static constexpr auto octalDigit{attempt("01234567"_ch)};
|
||||
if (std::optional<const char *> oct{octalDigit.Parse(state)}) {
|
||||
result = 8 * result + **oct - '0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
state.Say(at, "Bad escaped character"_err_en_US);
|
||||
}
|
||||
return {Result::Escaped(ch)};
|
||||
return std::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -279,14 +250,14 @@ template<char quote> struct CharLiteral {
|
|||
static std::optional<std::string> Parse(ParseState &state) {
|
||||
std::string str;
|
||||
static constexpr auto nextch{attempt(CharLiteralChar{})};
|
||||
while (std::optional<CharLiteralChar::Result> ch{nextch.Parse(state)}) {
|
||||
if (ch->ch == quote && !ch->wasEscaped) {
|
||||
while (std::optional<char> ch{nextch.Parse(state)}) {
|
||||
if (*ch == quote) {
|
||||
static constexpr auto doubled{attempt(AnyOfChars{SetOfChars{quote}})};
|
||||
if (!doubled.Parse(state).has_value()) {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
str += ch->ch;
|
||||
str += *ch;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
@ -544,7 +515,7 @@ struct HollerithLiteral {
|
|||
std::string content;
|
||||
for (auto j{*charCount}; j-- > 0;) {
|
||||
if (std::optional<int> chBytes{
|
||||
CharacterBytes(state.GetLocation(), state.encoding())}) {
|
||||
UTF_8CharacterBytes(state.GetLocation())}) {
|
||||
for (int bytes{*chBytes}; bytes > 0; --bytes) {
|
||||
if (std::optional<const char *> at{nextCh.Parse(state)}) {
|
||||
if (*chBytes == 1 && !isprint(**at)) {
|
||||
|
|
|
@ -31,7 +31,10 @@ namespace Fortran::semantics {
|
|||
using namespace parser::literals;
|
||||
|
||||
// The initial characters of a file that identify it as a .mod file.
|
||||
static constexpr auto magic{"!mod$ v1 sum:"};
|
||||
// The first three bytes are a Unicode byte order mark that ensures
|
||||
// that the module file is decoded as UTF-8 even if source files
|
||||
// are using another encoding.
|
||||
static constexpr auto magic{"\xef\xbb\xbf!mod$ v1 sum:"};
|
||||
|
||||
static const SourceName *GetSubmoduleParent(const parser::Program &);
|
||||
static std::string ModFilePath(const std::string &dir, const SourceName &,
|
||||
|
|
|
@ -15,9 +15,12 @@
|
|||
|
||||
! Test UTF-8 support in character literals
|
||||
! TODO: test EUC-JP
|
||||
! Note: Module files are encoded in UTF-8.
|
||||
|
||||
module m
|
||||
character(kind=4,len=:), parameter :: c4 = 4_"Hi! 你好!"
|
||||
! In CHARACTER(1) literals, codepoints > 0xff are serialized into UTF-8;
|
||||
! each of those bytes then gets encoded into UTF-8 for the module file.
|
||||
character(kind=1,len=:), parameter :: c1 = 1_"Hi! 你好!"
|
||||
character(kind=4,len=:), parameter :: c4a(:) = [4_"一", 4_"二", 4_"三", 4_"四", 4_"五"]
|
||||
integer, parameter :: lc4 = len(c4)
|
||||
|
@ -27,7 +30,7 @@ end module m
|
|||
!Expect: m.mod
|
||||
!module m
|
||||
!character(:,4),parameter::c4=4_"Hi! 你好!"
|
||||
!character(:,1),parameter::c1=1_"Hi! \344\275\240\345\245\275!"
|
||||
!character(:,1),parameter::c1=1_"Hi! ä½ å¥½!"
|
||||
!character(:,4),parameter::c4a(1_8:)=[CHARACTER(KIND=4,LEN=1)::"一","二","三","四","五"]
|
||||
!integer(4),parameter::lc4=7_4
|
||||
!integer(4),parameter::lc1=11_4
|
||||
|
|
|
@ -59,7 +59,8 @@ for src in "$@"; do
|
|||
echo FAIL
|
||||
exit 1
|
||||
fi
|
||||
sed '/^!mod\$/d' $temp/$mod > $actual
|
||||
# The first three bytes of the file are a UTF-8 BOM
|
||||
sed '/^.!mod\$/d' $temp/$mod > $actual
|
||||
sed '1,/^!Expect: '"$mod"'/d' $src | sed -e '/^$/,$d' -e 's/^! *//' > $expect
|
||||
if ! diff -U999999 $expect $actual > $diffs; then
|
||||
echo "Module file $mod differs from expected:"
|
||||
|
|
|
@ -446,7 +446,6 @@ int main(int argc, char *const argv[]) {
|
|||
}
|
||||
}
|
||||
}
|
||||
driver.encoding = options.encoding;
|
||||
|
||||
if (driver.warnOnNonstandardUsage) {
|
||||
options.features.WarnOnAllNonstandard();
|
||||
|
|
|
@ -452,7 +452,7 @@ int main(int argc, char *const argv[]) {
|
|||
driver.moduleFileSuffix = args.front();
|
||||
args.pop_front();
|
||||
} else if (arg == "-fno-utf-8") {
|
||||
options.encoding = Fortran::parser::Encoding::LATIN_1;
|
||||
driver.encoding = Fortran::parser::Encoding::LATIN_1;
|
||||
} else if (arg == "-help" || arg == "--help" || arg == "-?") {
|
||||
std::cerr
|
||||
<< "f18 options:\n"
|
||||
|
@ -496,11 +496,10 @@ int main(int argc, char *const argv[]) {
|
|||
} else if (arg.substr(0, 2) == "-I") {
|
||||
driver.searchDirectories.push_back(arg.substr(2));
|
||||
} else if (arg == "-Mx,125,4") { // PGI "all Kanji" mode
|
||||
options.encoding = Fortran::parser::Encoding::EUC_JP;
|
||||
driver.encoding = Fortran::parser::Encoding::EUC_JP;
|
||||
}
|
||||
}
|
||||
}
|
||||
driver.encoding = options.encoding;
|
||||
|
||||
if (driver.warnOnNonstandardUsage) {
|
||||
options.features.WarnOnAllNonstandard();
|
||||
|
@ -514,6 +513,7 @@ int main(int argc, char *const argv[]) {
|
|||
}
|
||||
|
||||
Fortran::parser::AllSources allSources;
|
||||
allSources.set_encoding(driver.encoding);
|
||||
Fortran::semantics::SemanticsContext semanticsContext{
|
||||
defaultKinds, options.features, allSources};
|
||||
semanticsContext.set_moduleDirectory(driver.moduleDirectory)
|
||||
|
|
Loading…
Reference in New Issue