forked from OSchip/llvm-project
[flang] Initial UTF-8 support in runtime I/O
Implements UTF-8 encoding and decoding for external units with OPEN(ENCODING='UTF-8'). This encoding applies to default CHARACTER values that are not 7-bit ASCII as well as to the wide CHARACTER kinds 2 and 4. Basic testing is in place via direct calls to the runtime I/O APIs, but serious checkout awaits lowering support of the wide CHARACTER kinds. Differential Revision: https://reviews.llvm.org/D122038
This commit is contained in:
parent
54d19ba208
commit
bafbae238a
|
@ -66,6 +66,7 @@ enum Iostat {
|
|||
IostatShortRead,
|
||||
IostatMissingTerminator,
|
||||
IostatBadUnformattedRecord,
|
||||
IostatUTF8Decoding,
|
||||
};
|
||||
|
||||
const char *IostatErrorString(int);
|
||||
|
|
|
@ -82,6 +82,7 @@ add_flang_library(FortranRuntime
|
|||
type-info.cpp
|
||||
unit.cpp
|
||||
unit-map.cpp
|
||||
utf.cpp
|
||||
|
||||
LINK_LIBS
|
||||
FortranDecimal
|
||||
|
|
|
@ -168,17 +168,17 @@ inline bool FormattedCharacterIO(
|
|||
for (std::size_t j{0}; j < numElements; ++j) {
|
||||
A *x{&ExtractElement<A>(io, descriptor, subscripts)};
|
||||
if (listOutput) {
|
||||
if (!ListDirectedDefaultCharacterOutput(io, *listOutput, x, length)) {
|
||||
if (!ListDirectedCharacterOutput(io, *listOutput, x, length)) {
|
||||
return false;
|
||||
}
|
||||
} else if (auto edit{io.GetNextDataEdit()}) {
|
||||
if constexpr (DIR == Direction::Output) {
|
||||
if (!EditDefaultCharacterOutput(io, *edit, x, length)) {
|
||||
if (!EditCharacterOutput(io, *edit, x, length)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (edit->descriptor != DataEdit::ListDirectedNullValue) {
|
||||
if (EditDefaultCharacterInput(io, *edit, x, length)) {
|
||||
if (EditCharacterInput(io, *edit, x, length)) {
|
||||
anyInput = true;
|
||||
} else {
|
||||
return anyInput && edit->IsNamelist();
|
||||
|
@ -456,7 +456,10 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) {
|
|||
switch (kind) {
|
||||
case 1:
|
||||
return FormattedCharacterIO<char, DIR>(io, descriptor);
|
||||
// TODO cases 2, 4
|
||||
case 2:
|
||||
return FormattedCharacterIO<char16_t, DIR>(io, descriptor);
|
||||
case 4:
|
||||
return FormattedCharacterIO<char32_t, DIR>(io, descriptor);
|
||||
default:
|
||||
handler.Crash(
|
||||
"DescriptorIO: Unimplemented CHARACTER kind (%d) in descriptor",
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "edit-input.h"
|
||||
#include "namelist.h"
|
||||
#include "utf.h"
|
||||
#include "flang/Common/real.h"
|
||||
#include "flang/Common/uint128.h"
|
||||
#include <algorithm>
|
||||
|
@ -61,7 +62,6 @@ static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
|
|||
if (next) {
|
||||
negative = *next == '-';
|
||||
if (negative || *next == '+') {
|
||||
io.GotChar();
|
||||
io.SkipSpaces(remaining);
|
||||
next = io.NextInField(remaining, edit);
|
||||
}
|
||||
|
@ -88,8 +88,7 @@ bool EditIntegerInput(
|
|||
case 'Z':
|
||||
return EditBOZInput(io, edit, n, 16, kind << 3);
|
||||
case 'A': // legacy extension
|
||||
return EditDefaultCharacterInput(
|
||||
io, edit, reinterpret_cast<char *>(n), kind);
|
||||
return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
|
||||
default:
|
||||
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
|
||||
"Data edit descriptor '%c' may not be used with an INTEGER data item",
|
||||
|
@ -260,9 +259,10 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
|
|||
next = io.NextInField(remaining, edit);
|
||||
}
|
||||
if (!next) { // NextInField fails on separators like ')'
|
||||
next = io.GetCurrentChar();
|
||||
std::size_t byteCount{0};
|
||||
next = io.GetCurrentChar(byteCount);
|
||||
if (next && *next == ')') {
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
}
|
||||
}
|
||||
} else if (remaining) {
|
||||
|
@ -427,8 +427,7 @@ bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
|
|||
return EditBOZInput(
|
||||
io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
|
||||
case 'A': // legacy extension
|
||||
return EditDefaultCharacterInput(
|
||||
io, edit, reinterpret_cast<char *>(n), KIND);
|
||||
return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
|
||||
default:
|
||||
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
|
||||
"Data edit descriptor '%c' may not be used for REAL input",
|
||||
|
@ -487,11 +486,13 @@ bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
|
|||
}
|
||||
|
||||
// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
|
||||
template <typename CHAR>
|
||||
static bool EditDelimitedCharacterInput(
|
||||
IoStatementState &io, char *x, std::size_t length, char32_t delimiter) {
|
||||
IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
|
||||
bool result{true};
|
||||
while (true) {
|
||||
auto ch{io.GetCurrentChar()};
|
||||
std::size_t byteCount{0};
|
||||
auto ch{io.GetCurrentChar(byteCount)};
|
||||
if (!ch) {
|
||||
if (io.AdvanceRecord()) {
|
||||
continue;
|
||||
|
@ -500,12 +501,12 @@ static bool EditDelimitedCharacterInput(
|
|||
break;
|
||||
}
|
||||
}
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
if (*ch == delimiter) {
|
||||
auto next{io.GetCurrentChar()};
|
||||
auto next{io.GetCurrentChar(byteCount)};
|
||||
if (next && *next == delimiter) {
|
||||
// Repeated delimiter: use as character value
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
} else {
|
||||
break; // closing delimiter
|
||||
}
|
||||
|
@ -519,19 +520,23 @@ static bool EditDelimitedCharacterInput(
|
|||
return result;
|
||||
}
|
||||
|
||||
static bool EditListDirectedDefaultCharacterInput(
|
||||
IoStatementState &io, char *x, std::size_t length, const DataEdit &edit) {
|
||||
auto ch{io.GetCurrentChar()};
|
||||
template <typename CHAR>
|
||||
static bool EditListDirectedCharacterInput(
|
||||
IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
|
||||
std::size_t byteCount{0};
|
||||
auto ch{io.GetCurrentChar(byteCount)};
|
||||
if (ch && (*ch == '\'' || *ch == '"')) {
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
return EditDelimitedCharacterInput(io, x, length, *ch);
|
||||
}
|
||||
if (IsNamelistName(io) || io.GetConnectionState().IsAtEOF()) {
|
||||
return false;
|
||||
}
|
||||
// Undelimited list-directed character input: stop at a value separator
|
||||
// or the end of the current record.
|
||||
std::optional<int> remaining{length};
|
||||
// or the end of the current record. Subtlety: the "remaining" count
|
||||
// here is a dummy that's used to avoid the interpretation of separators
|
||||
// in NextInField.
|
||||
std::optional<int> remaining{maxUTF8Bytes};
|
||||
while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
|
||||
switch (*next) {
|
||||
case ' ':
|
||||
|
@ -544,17 +549,19 @@ static bool EditListDirectedDefaultCharacterInput(
|
|||
default:
|
||||
*x++ = *next;
|
||||
--length;
|
||||
remaining = maxUTF8Bytes;
|
||||
}
|
||||
}
|
||||
std::fill_n(x, length, ' ');
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EditDefaultCharacterInput(
|
||||
IoStatementState &io, const DataEdit &edit, char *x, std::size_t length) {
|
||||
template <typename CHAR>
|
||||
bool EditCharacterInput(
|
||||
IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
|
||||
switch (edit.descriptor) {
|
||||
case DataEdit::ListDirected:
|
||||
return EditListDirectedDefaultCharacterInput(io, x, length, edit);
|
||||
return EditListDirectedCharacterInput(io, x, length, edit);
|
||||
case 'A':
|
||||
case 'G':
|
||||
break;
|
||||
|
@ -564,7 +571,8 @@ bool EditDefaultCharacterInput(
|
|||
edit.descriptor);
|
||||
return false;
|
||||
}
|
||||
if (io.GetConnectionState().IsAtEOF()) {
|
||||
const ConnectionState &connection{io.GetConnectionState()};
|
||||
if (connection.IsAtEOF()) {
|
||||
return false;
|
||||
}
|
||||
std::size_t remaining{length};
|
||||
|
@ -577,26 +585,9 @@ bool EditDefaultCharacterInput(
|
|||
const char *input{nullptr};
|
||||
std::size_t ready{0};
|
||||
bool hitEnd{false};
|
||||
if (remaining > length) {
|
||||
// Discard leading bytes.
|
||||
// These bytes don't count towards INQUIRE(IOLENGTH=).
|
||||
std::size_t skip{remaining - length};
|
||||
do {
|
||||
if (ready == 0) {
|
||||
ready = io.GetNextInputBytes(input);
|
||||
if (ready == 0) {
|
||||
hitEnd = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::size_t chunk{std::min<std::size_t>(skip, ready)};
|
||||
io.HandleRelativePosition(chunk);
|
||||
ready -= chunk;
|
||||
input += chunk;
|
||||
skip -= chunk;
|
||||
} while (skip > 0);
|
||||
remaining = length;
|
||||
}
|
||||
// Skip leading bytes.
|
||||
// These bytes don't count towards INQUIRE(IOLENGTH=).
|
||||
std::size_t skip{remaining > length ? remaining - length : 0};
|
||||
// Transfer payload bytes; these do count.
|
||||
while (remaining > 0) {
|
||||
if (ready == 0) {
|
||||
|
@ -606,18 +597,41 @@ bool EditDefaultCharacterInput(
|
|||
break;
|
||||
}
|
||||
}
|
||||
std::size_t chunk{std::min<std::size_t>(remaining, ready)};
|
||||
std::memcpy(x, input, chunk);
|
||||
x += chunk;
|
||||
std::size_t chunk;
|
||||
bool skipping{skip > 0};
|
||||
if (connection.isUTF8) {
|
||||
chunk = MeasureUTF8Bytes(*input);
|
||||
if (skipping) {
|
||||
--skip;
|
||||
} else if (auto ucs{DecodeUTF8(input)}) {
|
||||
*x++ = *ucs;
|
||||
--length;
|
||||
} else if (chunk == 0) {
|
||||
// error recovery: skip bad encoding
|
||||
chunk = 1;
|
||||
}
|
||||
--remaining;
|
||||
} else {
|
||||
if (skipping) {
|
||||
chunk = std::min<std::size_t>(skip, ready);
|
||||
skip -= chunk;
|
||||
} else {
|
||||
chunk = std::min<std::size_t>(remaining, ready);
|
||||
std::memcpy(x, input, chunk);
|
||||
x += chunk;
|
||||
length -= chunk;
|
||||
}
|
||||
remaining -= chunk;
|
||||
}
|
||||
input += chunk;
|
||||
io.GotChar(chunk);
|
||||
if (!skipping) {
|
||||
io.GotChar(chunk);
|
||||
}
|
||||
io.HandleRelativePosition(chunk);
|
||||
ready -= chunk;
|
||||
remaining -= chunk;
|
||||
length -= chunk;
|
||||
}
|
||||
// Pad the remainder of the input variable, if any.
|
||||
std::memset(x, ' ', length);
|
||||
std::fill_n(x, length, ' ');
|
||||
if (hitEnd) {
|
||||
io.CheckForEndOfRecord(); // signal any needed error
|
||||
}
|
||||
|
@ -631,4 +645,12 @@ template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
|
|||
template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
|
||||
// TODO: double/double
|
||||
template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
|
||||
|
||||
template bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char *, std::size_t);
|
||||
template bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char16_t *, std::size_t);
|
||||
template bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char32_t *, std::size_t);
|
||||
|
||||
} // namespace Fortran::runtime::io
|
||||
|
|
|
@ -21,8 +21,10 @@ template <int KIND>
|
|||
bool EditRealInput(IoStatementState &, const DataEdit &, void *);
|
||||
|
||||
bool EditLogicalInput(IoStatementState &, const DataEdit &, bool &);
|
||||
bool EditDefaultCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char *, std::size_t);
|
||||
|
||||
template <typename CHAR>
|
||||
bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, CHAR *, std::size_t);
|
||||
|
||||
extern template bool EditRealInput<2>(
|
||||
IoStatementState &, const DataEdit &, void *);
|
||||
|
@ -37,5 +39,13 @@ extern template bool EditRealInput<10>(
|
|||
// TODO: double/double
|
||||
extern template bool EditRealInput<16>(
|
||||
IoStatementState &, const DataEdit &, void *);
|
||||
|
||||
extern template bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char *, std::size_t);
|
||||
extern template bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char16_t *, std::size_t);
|
||||
extern template bool EditCharacterInput(
|
||||
IoStatementState &, const DataEdit &, char32_t *, std::size_t);
|
||||
|
||||
} // namespace Fortran::runtime::io
|
||||
#endif // FORTRAN_RUNTIME_EDIT_INPUT_H_
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "edit-output.h"
|
||||
#include "utf.h"
|
||||
#include "flang/Common/uint128.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -53,7 +54,7 @@ bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
|
|||
}
|
||||
break;
|
||||
case 'A': // legacy extension
|
||||
return EditDefaultCharacterOutput(
|
||||
return EditCharacterOutput(
|
||||
io, edit, reinterpret_cast<char *>(&n), sizeof n);
|
||||
default:
|
||||
io.GetIoErrorHandler().Crash(
|
||||
|
@ -434,7 +435,7 @@ template <int KIND> bool RealOutputEditing<KIND>::Edit(const DataEdit &edit) {
|
|||
case 'G':
|
||||
return Edit(EditForGOutput(edit));
|
||||
case 'A': // legacy extension
|
||||
return EditDefaultCharacterOutput(
|
||||
return EditCharacterOutput(
|
||||
io_, edit, reinterpret_cast<char *>(&x_), sizeof x_);
|
||||
default:
|
||||
if (edit.IsListDirected()) {
|
||||
|
@ -467,8 +468,9 @@ bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) {
|
|||
}
|
||||
}
|
||||
|
||||
bool ListDirectedDefaultCharacterOutput(IoStatementState &io,
|
||||
ListDirectedStatementState<Direction::Output> &list, const char *x,
|
||||
template <typename CHAR>
|
||||
bool ListDirectedCharacterOutput(IoStatementState &io,
|
||||
ListDirectedStatementState<Direction::Output> &list, const CHAR *x,
|
||||
std::size_t length) {
|
||||
bool ok{true};
|
||||
MutableModes &modes{io.mutableModes()};
|
||||
|
@ -477,11 +479,11 @@ bool ListDirectedDefaultCharacterOutput(IoStatementState &io,
|
|||
ok = ok && list.EmitLeadingSpaceOrAdvance(io);
|
||||
// Value is delimited with ' or " marks, and interior
|
||||
// instances of that character are doubled.
|
||||
auto EmitOne{[&](char ch) {
|
||||
auto EmitOne{[&](CHAR ch) {
|
||||
if (connection.NeedAdvance(1)) {
|
||||
ok = ok && io.AdvanceRecord();
|
||||
}
|
||||
ok = ok && io.Emit(&ch, 1);
|
||||
ok = ok && io.EmitEncoded(&ch, 1);
|
||||
}};
|
||||
EmitOne(modes.delim);
|
||||
for (std::size_t j{0}; j < length; ++j) {
|
||||
|
@ -494,7 +496,7 @@ bool ListDirectedDefaultCharacterOutput(IoStatementState &io,
|
|||
// the same thing when tested with this case.
|
||||
// This runtime splits the doubled delimiters across
|
||||
// two records for lack of a better alternative.
|
||||
if (x[j] == modes.delim) {
|
||||
if (x[j] == static_cast<CHAR>(modes.delim)) {
|
||||
EmitOne(x[j]);
|
||||
}
|
||||
EmitOne(x[j]);
|
||||
|
@ -504,12 +506,15 @@ bool ListDirectedDefaultCharacterOutput(IoStatementState &io,
|
|||
// Undelimited list-directed output
|
||||
ok = ok && list.EmitLeadingSpaceOrAdvance(io, length > 0 ? 1 : 0, true);
|
||||
std::size_t put{0};
|
||||
std::size_t oneIfUTF8{connection.isUTF8 ? 1 : length};
|
||||
while (ok && put < length) {
|
||||
auto chunk{std::min(length - put, connection.RemainingSpaceInRecord())};
|
||||
ok = ok && io.Emit(x + put, chunk);
|
||||
put += chunk;
|
||||
if (put < length) {
|
||||
ok = ok && io.AdvanceRecord() && io.Emit(" ", 1);
|
||||
if (std::size_t chunk{std::min<std::size_t>(
|
||||
std::min<std::size_t>(length - put, oneIfUTF8),
|
||||
connection.RemainingSpaceInRecord())}) {
|
||||
ok = io.EmitEncoded(x + put, chunk);
|
||||
put += chunk;
|
||||
} else {
|
||||
ok = io.AdvanceRecord() && io.Emit(" ", 1);
|
||||
}
|
||||
}
|
||||
list.set_lastWasUndelimitedCharacter(true);
|
||||
|
@ -517,8 +522,9 @@ bool ListDirectedDefaultCharacterOutput(IoStatementState &io,
|
|||
return ok;
|
||||
}
|
||||
|
||||
bool EditDefaultCharacterOutput(IoStatementState &io, const DataEdit &edit,
|
||||
const char *x, std::size_t length) {
|
||||
template <typename CHAR>
|
||||
bool EditCharacterOutput(IoStatementState &io, const DataEdit &edit,
|
||||
const CHAR *x, std::size_t length) {
|
||||
switch (edit.descriptor) {
|
||||
case 'A':
|
||||
case 'G':
|
||||
|
@ -532,7 +538,7 @@ bool EditDefaultCharacterOutput(IoStatementState &io, const DataEdit &edit,
|
|||
int len{static_cast<int>(length)};
|
||||
int width{edit.width.value_or(len)};
|
||||
return io.EmitRepeated(' ', std::max(0, width - len)) &&
|
||||
io.Emit(x, std::min(width, len));
|
||||
io.EmitEncoded(x, std::min(width, len));
|
||||
}
|
||||
|
||||
template bool EditIntegerOutput<1>(
|
||||
|
@ -553,4 +559,22 @@ template class RealOutputEditing<8>;
|
|||
template class RealOutputEditing<10>;
|
||||
// TODO: double/double
|
||||
template class RealOutputEditing<16>;
|
||||
|
||||
template bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char *,
|
||||
std::size_t chars);
|
||||
template bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char16_t *,
|
||||
std::size_t chars);
|
||||
template bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char32_t *,
|
||||
std::size_t chars);
|
||||
|
||||
template bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char *, std::size_t chars);
|
||||
template bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char16_t *, std::size_t chars);
|
||||
template bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char32_t *, std::size_t chars);
|
||||
|
||||
} // namespace Fortran::runtime::io
|
||||
|
|
|
@ -94,10 +94,30 @@ private:
|
|||
bool ListDirectedLogicalOutput(
|
||||
IoStatementState &, ListDirectedStatementState<Direction::Output> &, bool);
|
||||
bool EditLogicalOutput(IoStatementState &, const DataEdit &, bool);
|
||||
bool ListDirectedDefaultCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char *, std::size_t);
|
||||
bool EditDefaultCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char *, std::size_t);
|
||||
|
||||
template <typename CHAR>
|
||||
bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const CHAR *,
|
||||
std::size_t chars);
|
||||
extern template bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char *,
|
||||
std::size_t chars);
|
||||
extern template bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char16_t *,
|
||||
std::size_t chars);
|
||||
extern template bool ListDirectedCharacterOutput(IoStatementState &,
|
||||
ListDirectedStatementState<Direction::Output> &, const char32_t *,
|
||||
std::size_t chars);
|
||||
|
||||
template <typename CHAR>
|
||||
bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const CHAR *, std::size_t chars);
|
||||
extern template bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char *, std::size_t chars);
|
||||
extern template bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char16_t *, std::size_t chars);
|
||||
extern template bool EditCharacterOutput(
|
||||
IoStatementState &, const DataEdit &, const char32_t *, std::size_t chars);
|
||||
|
||||
extern template bool EditIntegerOutput<1>(
|
||||
IoStatementState &, const DataEdit &, std::int8_t);
|
||||
|
|
|
@ -78,6 +78,17 @@ void ExecutionEnvironment::Configure(
|
|||
}
|
||||
}
|
||||
|
||||
if (auto *x{std::getenv("DEFAULT_UTF8")}) {
|
||||
char *end;
|
||||
auto n{std::strtol(x, &end, 10)};
|
||||
if (n >= 0 && n <= 1 && *end == '\0') {
|
||||
defaultUTF8 = n != 0;
|
||||
} else {
|
||||
std::fprintf(
|
||||
stderr, "Fortran runtime: DEFAULT_UTF8=%s is invalid; ignored\n", x);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
|
||||
}
|
||||
|
||||
|
|
|
@ -30,19 +30,23 @@ enum class Convert { Unknown, Native, LittleEndian, BigEndian, Swap };
|
|||
std::optional<Convert> GetConvertFromString(const char *, std::size_t);
|
||||
|
||||
struct ExecutionEnvironment {
|
||||
constexpr ExecutionEnvironment(){};
|
||||
void Configure(int argc, const char *argv[], const char *envp[]);
|
||||
const char *GetEnv(
|
||||
const char *name, std::size_t name_length, const Terminator &terminator);
|
||||
|
||||
int argc;
|
||||
const char **argv;
|
||||
const char **envp;
|
||||
int argc{0};
|
||||
const char **argv{nullptr};
|
||||
const char **envp{nullptr};
|
||||
|
||||
int listDirectedOutputLineLengthLimit; // FORT_FMT_RECL
|
||||
enum decimal::FortranRounding defaultOutputRoundingMode;
|
||||
Convert conversion; // FORT_CONVERT
|
||||
bool noStopMessage; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
|
||||
int listDirectedOutputLineLengthLimit{79}; // FORT_FMT_RECL
|
||||
enum decimal::FortranRounding defaultOutputRoundingMode{
|
||||
decimal::FortranRounding::RoundNearest}; // RP(==PN)
|
||||
Convert conversion{Convert::Unknown}; // FORT_CONVERT
|
||||
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
|
||||
bool defaultUTF8{false}; // DEFAULT_UTF8
|
||||
};
|
||||
|
||||
extern ExecutionEnvironment executionEnvironment;
|
||||
} // namespace Fortran::runtime
|
||||
|
||||
|
|
|
@ -102,21 +102,6 @@ std::size_t InternalDescriptorUnit<DIR>::GetNextInputBytes(
|
|||
}
|
||||
}
|
||||
|
||||
template <Direction DIR>
|
||||
std::optional<char32_t> InternalDescriptorUnit<DIR>::GetCurrentChar(
|
||||
IoErrorHandler &handler) {
|
||||
const char *p{nullptr};
|
||||
std::size_t bytes{GetNextInputBytes(p, handler)};
|
||||
if (bytes == 0) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
if (isUTF8) {
|
||||
// TODO: UTF-8 decoding
|
||||
}
|
||||
return *p;
|
||||
}
|
||||
}
|
||||
|
||||
template <Direction DIR>
|
||||
bool InternalDescriptorUnit<DIR>::AdvanceRecord(IoErrorHandler &handler) {
|
||||
if (currentRecordNumber >= endfileRecordNumber.value_or(0)) {
|
||||
|
|
|
@ -32,7 +32,6 @@ public:
|
|||
|
||||
bool Emit(const char *, std::size_t, IoErrorHandler &);
|
||||
std::size_t GetNextInputBytes(const char *&, IoErrorHandler &);
|
||||
std::optional<char32_t> GetCurrentChar(IoErrorHandler &);
|
||||
bool AdvanceRecord(IoErrorHandler &);
|
||||
void BackspaceRecord(IoErrorHandler &);
|
||||
|
||||
|
|
|
@ -11,11 +11,13 @@
|
|||
#include "format.h"
|
||||
#include "tools.h"
|
||||
#include "unit.h"
|
||||
#include "utf.h"
|
||||
#include "flang/Runtime/memory.h"
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Fortran::runtime::io {
|
||||
|
||||
|
@ -357,7 +359,6 @@ bool ExternalIoStatementState<DIR>::Emit(
|
|||
Crash(
|
||||
"ExternalIoStatementState::Emit(char16_t) called for input statement");
|
||||
}
|
||||
// TODO: UTF-8 encoding
|
||||
return unit().Emit(reinterpret_cast<const char *>(data), chars * sizeof *data,
|
||||
sizeof *data, *this);
|
||||
}
|
||||
|
@ -369,7 +370,6 @@ bool ExternalIoStatementState<DIR>::Emit(
|
|||
Crash(
|
||||
"ExternalIoStatementState::Emit(char32_t) called for input statement");
|
||||
}
|
||||
// TODO: UTF-8 encoding
|
||||
return unit().Emit(reinterpret_cast<const char *>(data), chars * sizeof *data,
|
||||
sizeof *data, *this);
|
||||
}
|
||||
|
@ -472,6 +472,30 @@ bool IoStatementState::Emit(const char32_t *data, std::size_t chars) {
|
|||
return std::visit([=](auto &x) { return x.get().Emit(data, chars); }, u_);
|
||||
}
|
||||
|
||||
template <typename CHAR>
|
||||
bool IoStatementState::EmitEncoded(const CHAR *data0, std::size_t chars) {
|
||||
// Don't allow sign extension
|
||||
using UnsignedChar = std::make_unsigned_t<CHAR>;
|
||||
const UnsignedChar *data{reinterpret_cast<const UnsignedChar *>(data0)};
|
||||
if (GetConnectionState().isUTF8) {
|
||||
char buffer[256];
|
||||
std::size_t at{0};
|
||||
while (chars-- > 0) {
|
||||
auto len{EncodeUTF8(buffer + at, *data++)};
|
||||
at += len;
|
||||
if (at + maxUTF8Bytes > sizeof buffer) {
|
||||
if (!Emit(buffer, at)) {
|
||||
return false;
|
||||
}
|
||||
at = 0;
|
||||
}
|
||||
}
|
||||
return at == 0 || Emit(buffer, at);
|
||||
} else {
|
||||
return Emit(data0, chars);
|
||||
}
|
||||
}
|
||||
|
||||
bool IoStatementState::Receive(
|
||||
char *data, std::size_t n, std::size_t elementBytes) {
|
||||
return std::visit(
|
||||
|
@ -533,6 +557,30 @@ ExternalFileUnit *IoStatementState::GetExternalFileUnit() const {
|
|||
return std::visit([](auto &x) { return x.get().GetExternalFileUnit(); }, u_);
|
||||
}
|
||||
|
||||
std::optional<char32_t> IoStatementState::GetCurrentChar(
|
||||
std::size_t &byteCount) {
|
||||
const char *p{nullptr};
|
||||
std::size_t bytes{GetNextInputBytes(p)};
|
||||
if (bytes == 0) {
|
||||
byteCount = 0;
|
||||
return std::nullopt;
|
||||
} else {
|
||||
if (GetConnectionState().isUTF8) {
|
||||
std::size_t length{MeasureUTF8Bytes(*p)};
|
||||
if (length <= bytes) {
|
||||
if (auto result{DecodeUTF8(p)}) {
|
||||
byteCount = length;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
GetIoErrorHandler().SignalError(IostatUTF8Decoding);
|
||||
// Error recovery: return the next byte
|
||||
}
|
||||
byteCount = 1;
|
||||
return *p;
|
||||
}
|
||||
}
|
||||
|
||||
bool IoStatementState::EmitRepeated(char ch, std::size_t n) {
|
||||
return std::visit(
|
||||
[=](auto &x) {
|
||||
|
@ -561,8 +609,9 @@ bool IoStatementState::EmitField(
|
|||
|
||||
std::optional<char32_t> IoStatementState::NextInField(
|
||||
std::optional<int> &remaining, const DataEdit &edit) {
|
||||
std::size_t byteCount{0};
|
||||
if (!remaining) { // Stream, list-directed, or NAMELIST
|
||||
if (auto next{GetCurrentChar()}) {
|
||||
if (auto next{GetCurrentChar(byteCount)}) {
|
||||
if (edit.IsListDirected()) {
|
||||
// list-directed or NAMELIST: check for separators
|
||||
switch (*next) {
|
||||
|
@ -587,15 +636,18 @@ std::optional<char32_t> IoStatementState::NextInField(
|
|||
break;
|
||||
}
|
||||
}
|
||||
HandleRelativePosition(1);
|
||||
GotChar();
|
||||
HandleRelativePosition(byteCount);
|
||||
GotChar(byteCount);
|
||||
return next;
|
||||
}
|
||||
} else if (*remaining > 0) {
|
||||
if (auto next{GetCurrentChar()}) {
|
||||
--*remaining;
|
||||
HandleRelativePosition(1);
|
||||
GotChar();
|
||||
if (auto next{GetCurrentChar(byteCount)}) {
|
||||
if (byteCount > static_cast<std::size_t>(*remaining)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
*remaining -= byteCount;
|
||||
HandleRelativePosition(byteCount);
|
||||
GotChar(byteCount);
|
||||
return next;
|
||||
}
|
||||
if (CheckForEndOfRecord()) { // do padding
|
||||
|
@ -708,12 +760,13 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
|
|||
if (edit.modes.editingFlags & decimalComma) {
|
||||
comma = ';';
|
||||
}
|
||||
std::size_t byteCount{0};
|
||||
if (remaining_ > 0 && !realPart_) { // "r*c" repetition in progress
|
||||
RUNTIME_CHECK(io.GetIoErrorHandler(), repeatPosition_.has_value());
|
||||
repeatPosition_.reset(); // restores the saved position
|
||||
if (!imaginaryPart_) {
|
||||
edit.repeat = std::min<int>(remaining_, maxRepeat);
|
||||
auto ch{io.GetCurrentChar()};
|
||||
auto ch{io.GetCurrentChar(byteCount)};
|
||||
if (!ch || *ch == ' ' || *ch == '\t' || *ch == comma) {
|
||||
// "r*" repeated null
|
||||
edit.descriptor = DataEdit::ListDirectedNullValue;
|
||||
|
@ -733,14 +786,14 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
|
|||
imaginaryPart_ = true;
|
||||
edit.descriptor = DataEdit::ListDirectedImaginaryPart;
|
||||
}
|
||||
auto ch{io.GetNextNonBlank()};
|
||||
auto ch{io.GetNextNonBlank(byteCount)};
|
||||
if (ch && *ch == comma && eatComma_) {
|
||||
// Consume comma & whitespace after previous item.
|
||||
// This includes the comma between real and imaginary components
|
||||
// in list-directed/NAMELIST complex input.
|
||||
// (When DECIMAL='COMMA', the comma is actually a semicolon.)
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetNextNonBlank();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
eatComma_ = true;
|
||||
if (!ch) {
|
||||
|
@ -768,12 +821,12 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
|
|||
break;
|
||||
}
|
||||
r = 10 * r + (*ch - '0');
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetCurrentChar();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetCurrentChar(byteCount);
|
||||
} while (ch && *ch >= '0' && *ch <= '9');
|
||||
if (r > 0 && ch && *ch == '*') { // subtle: r must be nonzero
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetCurrentChar();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetCurrentChar(byteCount);
|
||||
if (ch && *ch == '/') { // r*/
|
||||
hitSlash_ = true;
|
||||
edit.descriptor = DataEdit::ListDirectedNullValue;
|
||||
|
@ -793,7 +846,7 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
|
|||
}
|
||||
if (!imaginaryPart_ && ch && *ch == '(') {
|
||||
realPart_ = true;
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
edit.descriptor = DataEdit::ListDirectedRealPart;
|
||||
}
|
||||
return edit;
|
||||
|
@ -1445,4 +1498,10 @@ int ErroneousIoStatementState::EndIoStatement() {
|
|||
return IoStatementBase::EndIoStatement();
|
||||
}
|
||||
|
||||
template bool IoStatementState::EmitEncoded<char>(const char *, std::size_t);
|
||||
template bool IoStatementState::EmitEncoded<char16_t>(
|
||||
const char16_t *, std::size_t);
|
||||
template bool IoStatementState::EmitEncoded<char32_t>(
|
||||
const char32_t *, std::size_t);
|
||||
|
||||
} // namespace Fortran::runtime::io
|
||||
|
|
|
@ -90,6 +90,7 @@ public:
|
|||
bool Emit(const char *, std::size_t);
|
||||
bool Emit(const char16_t *, std::size_t chars);
|
||||
bool Emit(const char32_t *, std::size_t chars);
|
||||
template <typename CHAR> bool EmitEncoded(const CHAR *, std::size_t);
|
||||
bool Receive(char *, std::size_t, std::size_t elementBytes = 0);
|
||||
std::size_t GetNextInputBytes(const char *&);
|
||||
bool AdvanceRecord(int = 1);
|
||||
|
@ -123,16 +124,7 @@ public:
|
|||
}
|
||||
|
||||
// Vacant after the end of the current record
|
||||
std::optional<char32_t> GetCurrentChar() {
|
||||
const char *p{nullptr};
|
||||
std::size_t bytes{GetNextInputBytes(p)};
|
||||
if (bytes == 0) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
// TODO: UTF-8 decoding; may have to get more bytes in a loop
|
||||
return *p;
|
||||
}
|
||||
}
|
||||
std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);
|
||||
|
||||
bool EmitRepeated(char, std::size_t);
|
||||
bool EmitField(const char *, std::size_t length, std::size_t width);
|
||||
|
@ -144,7 +136,8 @@ public:
|
|||
const DataEdit &edit, std::optional<int> &remaining) {
|
||||
remaining.reset();
|
||||
if (edit.descriptor == DataEdit::ListDirected) {
|
||||
GetNextNonBlank();
|
||||
std::size_t byteCount{0};
|
||||
GetNextNonBlank(byteCount);
|
||||
} else {
|
||||
if (edit.width.value_or(0) > 0) {
|
||||
remaining = *edit.width;
|
||||
|
@ -156,15 +149,19 @@ public:
|
|||
|
||||
std::optional<char32_t> SkipSpaces(std::optional<int> &remaining) {
|
||||
while (!remaining || *remaining > 0) {
|
||||
if (auto ch{GetCurrentChar()}) {
|
||||
std::size_t byteCount{0};
|
||||
if (auto ch{GetCurrentChar(byteCount)}) {
|
||||
if (*ch != ' ' && *ch != '\t') {
|
||||
return ch;
|
||||
}
|
||||
HandleRelativePosition(1);
|
||||
if (remaining) {
|
||||
GotChar();
|
||||
--*remaining;
|
||||
if (static_cast<std::size_t>(*remaining) < byteCount) {
|
||||
break;
|
||||
}
|
||||
GotChar(byteCount);
|
||||
*remaining -= byteCount;
|
||||
}
|
||||
HandleRelativePosition(byteCount);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -182,16 +179,16 @@ public:
|
|||
bool CheckForEndOfRecord();
|
||||
|
||||
// Skips spaces, advances records, and ignores NAMELIST comments
|
||||
std::optional<char32_t> GetNextNonBlank() {
|
||||
auto ch{GetCurrentChar()};
|
||||
std::optional<char32_t> GetNextNonBlank(std::size_t &byteCount) {
|
||||
auto ch{GetCurrentChar(byteCount)};
|
||||
bool inNamelist{mutableModes().inNamelist};
|
||||
while (!ch || *ch == ' ' || *ch == '\t' || (inNamelist && *ch == '!')) {
|
||||
if (ch && (*ch == ' ' || *ch == '\t')) {
|
||||
HandleRelativePosition(1);
|
||||
HandleRelativePosition(byteCount);
|
||||
} else if (!AdvanceRecord()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
ch = GetCurrentChar();
|
||||
ch = GetCurrentChar(byteCount);
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
@ -721,5 +718,12 @@ private:
|
|||
ConnectionState connection_;
|
||||
};
|
||||
|
||||
extern template bool IoStatementState::EmitEncoded<char>(
|
||||
const char *, std::size_t);
|
||||
extern template bool IoStatementState::EmitEncoded<char16_t>(
|
||||
const char16_t *, std::size_t);
|
||||
extern template bool IoStatementState::EmitEncoded<char32_t>(
|
||||
const char32_t *, std::size_t);
|
||||
|
||||
} // namespace Fortran::runtime::io
|
||||
#endif // FORTRAN_RUNTIME_IO_STMT_H_
|
||||
|
|
|
@ -75,6 +75,8 @@ const char *IostatErrorString(int iostat) {
|
|||
return "Sequential record missing its terminator";
|
||||
case IostatBadUnformattedRecord:
|
||||
return "Erroneous unformatted sequential file record structure";
|
||||
case IostatUTF8Decoding:
|
||||
return "UTF-8 decoding error";
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -86,13 +86,14 @@ static constexpr char NormalizeIdChar(char32_t ch) {
|
|||
|
||||
static bool GetLowerCaseName(
|
||||
IoStatementState &io, char buffer[], std::size_t maxLength) {
|
||||
if (auto ch{io.GetNextNonBlank()}) {
|
||||
std::size_t byteLength{0};
|
||||
if (auto ch{io.GetNextNonBlank(byteLength)}) {
|
||||
if (IsLegalIdStart(*ch)) {
|
||||
std::size_t j{0};
|
||||
do {
|
||||
buffer[j] = NormalizeIdChar(*ch);
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetCurrentChar();
|
||||
io.HandleRelativePosition(byteLength);
|
||||
ch = io.GetCurrentChar(byteLength);
|
||||
} while (++j < maxLength && ch && IsLegalIdChar(*ch));
|
||||
buffer[j++] = '\0';
|
||||
if (j <= maxLength) {
|
||||
|
@ -107,19 +108,20 @@ static bool GetLowerCaseName(
|
|||
|
||||
static std::optional<SubscriptValue> GetSubscriptValue(IoStatementState &io) {
|
||||
std::optional<SubscriptValue> value;
|
||||
std::optional<char32_t> ch{io.GetCurrentChar()};
|
||||
std::size_t byteCount{0};
|
||||
std::optional<char32_t> ch{io.GetCurrentChar(byteCount)};
|
||||
bool negate{ch && *ch == '-'};
|
||||
if ((ch && *ch == '+') || negate) {
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetCurrentChar();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetCurrentChar(byteCount);
|
||||
}
|
||||
bool overflow{false};
|
||||
while (ch && *ch >= '0' && *ch <= '9') {
|
||||
SubscriptValue was{value.value_or(0)};
|
||||
overflow |= was >= std::numeric_limits<SubscriptValue>::max() / 10;
|
||||
value = 10 * was + *ch - '0';
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetCurrentChar();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetCurrentChar(byteCount);
|
||||
}
|
||||
if (overflow) {
|
||||
io.GetIoErrorHandler().SignalError(
|
||||
|
@ -130,7 +132,7 @@ static std::optional<SubscriptValue> GetSubscriptValue(IoStatementState &io) {
|
|||
if (value) {
|
||||
return -*value;
|
||||
} else {
|
||||
io.HandleRelativePosition(-1); // give back '-' with no digits
|
||||
io.HandleRelativePosition(-byteCount); // give back '-' with no digits
|
||||
}
|
||||
}
|
||||
return value;
|
||||
|
@ -146,7 +148,8 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
|
|||
int j{0};
|
||||
std::size_t contiguousStride{source.ElementBytes()};
|
||||
bool ok{true};
|
||||
std::optional<char32_t> ch{io.GetNextNonBlank()};
|
||||
std::size_t byteCount{0};
|
||||
std::optional<char32_t> ch{io.GetNextNonBlank(byteCount)};
|
||||
char32_t comma{GetComma(io)};
|
||||
for (; ch && *ch != ')'; ++j) {
|
||||
SubscriptValue dimLower{0}, dimUpper{0}, dimStride{0};
|
||||
|
@ -176,11 +179,11 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
|
|||
} else {
|
||||
dimLower = *low;
|
||||
}
|
||||
ch = io.GetNextNonBlank();
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
if (ch && *ch == ':') {
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetNextNonBlank();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
if (auto high{GetSubscriptValue(io)}) {
|
||||
if (*high > dimUpper) {
|
||||
if (ok) {
|
||||
|
@ -194,14 +197,14 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
|
|||
} else {
|
||||
dimUpper = *high;
|
||||
}
|
||||
ch = io.GetNextNonBlank();
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
if (ch && *ch == ':') {
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetNextNonBlank();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
if (auto str{GetSubscriptValue(io)}) {
|
||||
dimStride = *str;
|
||||
ch = io.GetNextNonBlank();
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
}
|
||||
} else { // scalar
|
||||
|
@ -209,8 +212,8 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
|
|||
dimStride = 0;
|
||||
}
|
||||
if (ch && *ch == comma) {
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetNextNonBlank();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
if (ok) {
|
||||
lower[j] = dimLower;
|
||||
|
@ -220,7 +223,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
|
|||
}
|
||||
if (ok) {
|
||||
if (ch && *ch == ')') {
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
if (desc.EstablishPointerSection(source, lower, upper, stride)) {
|
||||
return true;
|
||||
} else {
|
||||
|
@ -250,29 +253,30 @@ static bool HandleSubstring(
|
|||
// ambiguous within the parentheses.
|
||||
io.HandleRelativePosition(1); // skip '('
|
||||
std::optional<SubscriptValue> lower, upper;
|
||||
std::optional<char32_t> ch{io.GetNextNonBlank()};
|
||||
std::size_t byteCount{0};
|
||||
std::optional<char32_t> ch{io.GetNextNonBlank(byteCount)};
|
||||
if (ch) {
|
||||
if (*ch == ':') {
|
||||
lower = 1;
|
||||
} else {
|
||||
lower = GetSubscriptValue(io);
|
||||
ch = io.GetNextNonBlank();
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
}
|
||||
if (ch && ch == ':') {
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetNextNonBlank();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
if (ch) {
|
||||
if (*ch == ')') {
|
||||
upper = chars;
|
||||
} else {
|
||||
upper = GetSubscriptValue(io);
|
||||
ch = io.GetNextNonBlank();
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ch && *ch == ')') {
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
if (lower && upper) {
|
||||
if (*lower > *upper) {
|
||||
// An empty substring, whatever the values are
|
||||
|
@ -335,16 +339,17 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc,
|
|||
|
||||
// Advance to the terminal '/' of a namelist group.
|
||||
static void SkipNamelistGroup(IoStatementState &io) {
|
||||
while (auto ch{io.GetNextNonBlank()}) {
|
||||
io.HandleRelativePosition(1);
|
||||
std::size_t byteCount{0};
|
||||
while (auto ch{io.GetNextNonBlank(byteCount)}) {
|
||||
io.HandleRelativePosition(byteCount);
|
||||
if (*ch == '/') {
|
||||
break;
|
||||
} else if (*ch == '\'' || *ch == '"') {
|
||||
// Skip quoted character literal
|
||||
char32_t quote{*ch};
|
||||
while (true) {
|
||||
if ((ch = io.GetCurrentChar())) {
|
||||
io.HandleRelativePosition(1);
|
||||
if ((ch = io.GetCurrentChar(byteCount))) {
|
||||
io.HandleRelativePosition(byteCount);
|
||||
if (*ch == quote) {
|
||||
break;
|
||||
}
|
||||
|
@ -369,14 +374,15 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
|
|||
char name[nameBufferSize];
|
||||
RUNTIME_CHECK(handler, group.groupName != nullptr);
|
||||
char32_t comma{GetComma(io)};
|
||||
std::size_t byteCount{0};
|
||||
while (true) {
|
||||
next = io.GetNextNonBlank();
|
||||
next = io.GetNextNonBlank(byteCount);
|
||||
while (next && *next != '&') {
|
||||
// Extension: comment lines without ! before namelist groups
|
||||
if (!io.AdvanceRecord()) {
|
||||
next.reset();
|
||||
} else {
|
||||
next = io.GetNextNonBlank();
|
||||
next = io.GetNextNonBlank(byteCount);
|
||||
}
|
||||
}
|
||||
if (!next || *next != '&') {
|
||||
|
@ -384,7 +390,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
|
|||
"NAMELIST input group does not begin with '&' (at '%lc')", *next);
|
||||
return false;
|
||||
}
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
if (!GetLowerCaseName(io, name, sizeof name)) {
|
||||
handler.SignalError("NAMELIST input group has no name");
|
||||
return false;
|
||||
|
@ -396,7 +402,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
|
|||
}
|
||||
// Read the group's items
|
||||
while (true) {
|
||||
next = io.GetNextNonBlank();
|
||||
next = io.GetNextNonBlank(byteCount);
|
||||
if (!next || *next == '/') {
|
||||
break;
|
||||
}
|
||||
|
@ -423,7 +429,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
|
|||
const Descriptor *useDescriptor{&itemDescriptor};
|
||||
StaticDescriptor<maxRank, true, 16> staticDesc[2];
|
||||
int whichStaticDesc{0};
|
||||
next = io.GetCurrentChar();
|
||||
next = io.GetCurrentChar(byteCount);
|
||||
bool hadSubscripts{false};
|
||||
bool hadSubstring{false};
|
||||
if (next && (*next == '(' || *next == '%')) {
|
||||
|
@ -456,25 +462,25 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
|
|||
hadSubstring = false;
|
||||
}
|
||||
useDescriptor = &mutableDescriptor;
|
||||
next = io.GetCurrentChar();
|
||||
next = io.GetCurrentChar(byteCount);
|
||||
} while (next && (*next == '(' || *next == '%'));
|
||||
}
|
||||
// Skip the '='
|
||||
next = io.GetNextNonBlank();
|
||||
next = io.GetNextNonBlank(byteCount);
|
||||
if (!next || *next != '=') {
|
||||
handler.SignalError("No '=' found after item '%s' in NAMELIST group '%s'",
|
||||
name, group.groupName);
|
||||
return false;
|
||||
}
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
// Read the values into the descriptor. An array can be short.
|
||||
listInput->ResetForNextNamelistItem();
|
||||
if (!descr::DescriptorIO<Direction::Input>(io, *useDescriptor)) {
|
||||
return false;
|
||||
}
|
||||
next = io.GetNextNonBlank();
|
||||
next = io.GetNextNonBlank(byteCount);
|
||||
if (next && *next == comma) {
|
||||
io.HandleRelativePosition(1);
|
||||
io.HandleRelativePosition(byteCount);
|
||||
}
|
||||
}
|
||||
if (!next || *next != '/') {
|
||||
|
@ -490,13 +496,14 @@ bool IsNamelistName(IoStatementState &io) {
|
|||
if (io.get_if<ListDirectedStatementState<Direction::Input>>()) {
|
||||
if (io.mutableModes().inNamelist) {
|
||||
SavedPosition savedPosition{io};
|
||||
if (auto ch{io.GetNextNonBlank()}) {
|
||||
std::size_t byteCount{0};
|
||||
if (auto ch{io.GetNextNonBlank(byteCount)}) {
|
||||
if (IsLegalIdStart(*ch)) {
|
||||
do {
|
||||
io.HandleRelativePosition(1);
|
||||
ch = io.GetCurrentChar();
|
||||
io.HandleRelativePosition(byteCount);
|
||||
ch = io.GetCurrentChar(byteCount);
|
||||
} while (ch && IsLegalIdChar(*ch));
|
||||
ch = io.GetNextNonBlank();
|
||||
ch = io.GetNextNonBlank(byteCount);
|
||||
// TODO: how to deal with NaN(...) ambiguity?
|
||||
return ch && (*ch == '=' || *ch == '(' || *ch == '%');
|
||||
}
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "unit.h"
|
||||
#include "environment.h"
|
||||
#include "io-error.h"
|
||||
#include "lock.h"
|
||||
#include "unit-map.h"
|
||||
|
@ -233,7 +232,6 @@ UnitMap &ExternalFileUnit::GetUnitMap() {
|
|||
error.isUnformatted = false;
|
||||
errorOutput = &error;
|
||||
|
||||
// TODO: Set UTF-8 mode from the environment
|
||||
unitMap = newUnitMap;
|
||||
return *unitMap;
|
||||
}
|
||||
|
@ -374,18 +372,6 @@ std::size_t ExternalFileUnit::GetNextInputBytes(
|
|||
return p ? length : 0;
|
||||
}
|
||||
|
||||
std::optional<char32_t> ExternalFileUnit::GetCurrentChar(
|
||||
IoErrorHandler &handler) {
|
||||
const char *p{nullptr};
|
||||
std::size_t bytes{GetNextInputBytes(p, handler)};
|
||||
if (bytes == 0) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
// TODO: UTF-8 decoding; may have to get more bytes in a loop
|
||||
return *p;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ExternalFileUnit::FrameNextInput(
|
||||
IoErrorHandler &handler, std::size_t bytes) {
|
||||
RUNTIME_CHECK(handler, isUnformatted.has_value() && !*isUnformatted);
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "buffer.h"
|
||||
#include "connection.h"
|
||||
#include "environment.h"
|
||||
#include "file.h"
|
||||
#include "format.h"
|
||||
#include "io-error.h"
|
||||
|
@ -34,7 +35,9 @@ class ExternalFileUnit : public ConnectionState,
|
|||
public OpenFile,
|
||||
public FileFrame<ExternalFileUnit> {
|
||||
public:
|
||||
explicit ExternalFileUnit(int unitNumber) : unitNumber_{unitNumber} {}
|
||||
explicit ExternalFileUnit(int unitNumber) : unitNumber_{unitNumber} {
|
||||
isUTF8 = executionEnvironment.defaultUTF8;
|
||||
}
|
||||
~ExternalFileUnit() {}
|
||||
|
||||
int unitNumber() const { return unitNumber_; }
|
||||
|
@ -80,7 +83,6 @@ public:
|
|||
const char *, std::size_t, std::size_t elementBytes, IoErrorHandler &);
|
||||
bool Receive(char *, std::size_t, std::size_t elementBytes, IoErrorHandler &);
|
||||
std::size_t GetNextInputBytes(const char *&, IoErrorHandler &);
|
||||
std::optional<char32_t> GetCurrentChar(IoErrorHandler &);
|
||||
void SetLeftTabLimit();
|
||||
bool BeginReadingRecord(IoErrorHandler &);
|
||||
void FinishReadingRecord(IoErrorHandler &);
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
//===-- runtime/utf.cpp ---------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "utf.h"
|
||||
|
||||
namespace Fortran::runtime {
|
||||
|
||||
// clang-format off
|
||||
const std::uint8_t UTF8FirstByteTable[256]{
|
||||
/* 00 - 7F: 7 bit payload in single byte */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 80 - BF: invalid first byte, valid later byte */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* C0 - DF: 11 bit payload */
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
/* E0 - EF: 16 bit payload */
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
/* F0 - F7: 21 bit payload */ 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
/* F8 - FB: 26 bit payload */ 5, 5, 5, 5,
|
||||
/* FC - FD: 31 bit payload */ 6, 6,
|
||||
/* FE: 32 bit payload */ 7,
|
||||
/* FF: invalid */ 0
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// Non-minimal encodings are accepted.
|
||||
std::optional<char32_t> DecodeUTF8(const char *p0) {
|
||||
const std::uint8_t *p{reinterpret_cast<const std::uint8_t *>(p0)};
|
||||
std::size_t bytes{MeasureUTF8Bytes(*p0)};
|
||||
if (bytes == 1) {
|
||||
return char32_t{*p};
|
||||
} else if (bytes > 1) {
|
||||
std::uint64_t result{char32_t{*p} & (0x7f >> bytes)};
|
||||
for (std::size_t j{1}; j < bytes; ++j) {
|
||||
std::uint8_t next{p[j]};
|
||||
if (next < 0x80 || next > 0xbf) {
|
||||
return std::nullopt;
|
||||
}
|
||||
result = (result << 6) | (next & 0x3f);
|
||||
}
|
||||
if (result <= 0xffffffff) {
|
||||
return static_cast<char32_t>(result);
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::size_t EncodeUTF8(char *p0, char32_t ucs) {
|
||||
std::uint8_t *p{reinterpret_cast<std::uint8_t *>(p0)};
|
||||
if (ucs <= 0x7f) {
|
||||
p[0] = ucs;
|
||||
return 1;
|
||||
} else if (ucs <= 0x7ff) {
|
||||
p[0] = 0xc0 | (ucs >> 6);
|
||||
p[1] = 0x80 | (ucs & 0x3f);
|
||||
return 2;
|
||||
} else if (ucs <= 0xffff) {
|
||||
p[0] = 0xe0 | (ucs >> 12);
|
||||
p[1] = 0x80 | ((ucs >> 6) & 0x3f);
|
||||
p[2] = 0x80 | (ucs & 0x3f);
|
||||
return 3;
|
||||
} else if (ucs <= 0x1fffff) {
|
||||
p[0] = 0xf0 | (ucs >> 18);
|
||||
p[1] = 0x80 | ((ucs >> 12) & 0x3f);
|
||||
p[2] = 0x80 | ((ucs >> 6) & 0x3f);
|
||||
p[3] = 0x80 | (ucs & 0x3f);
|
||||
return 4;
|
||||
} else if (ucs <= 0x3ffffff) {
|
||||
p[0] = 0xf8 | (ucs >> 24);
|
||||
p[1] = 0x80 | ((ucs >> 18) & 0x3f);
|
||||
p[2] = 0x80 | ((ucs >> 12) & 0x3f);
|
||||
p[3] = 0x80 | ((ucs >> 6) & 0x3f);
|
||||
p[4] = 0x80 | (ucs & 0x3f);
|
||||
return 5;
|
||||
} else if (ucs <= 0x7ffffff) {
|
||||
p[0] = 0xf8 | (ucs >> 30);
|
||||
p[1] = 0x80 | ((ucs >> 24) & 0x3f);
|
||||
p[2] = 0x80 | ((ucs >> 18) & 0x3f);
|
||||
p[3] = 0x80 | ((ucs >> 12) & 0x3f);
|
||||
p[4] = 0x80 | ((ucs >> 6) & 0x3f);
|
||||
p[5] = 0x80 | (ucs & 0x3f);
|
||||
return 6;
|
||||
} else {
|
||||
p[0] = 0xfe;
|
||||
p[1] = 0x80 | ((ucs >> 30) & 0x3f);
|
||||
p[2] = 0x80 | ((ucs >> 24) & 0x3f);
|
||||
p[3] = 0x80 | ((ucs >> 18) & 0x3f);
|
||||
p[4] = 0x80 | ((ucs >> 12) & 0x3f);
|
||||
p[5] = 0x80 | ((ucs >> 6) & 0x3f);
|
||||
p[6] = 0x80 | (ucs & 0x3f);
|
||||
return 7;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Fortran::runtime
|
|
@ -0,0 +1,68 @@
|
|||
//===-- runtime/utf.h -----------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// UTF-8 is the variant-width standard encoding of Unicode (ISO 10646)
|
||||
// code points.
|
||||
//
|
||||
// 7-bit values in [00 .. 7F] represent themselves as single bytes, so true
|
||||
// 7-bit ASCII is also valid UTF-8.
|
||||
//
|
||||
// Larger values are encoded with a start byte in [C0 .. FE] that carries
|
||||
// the length of the encoding and some of the upper bits of the value, followed
|
||||
// by one or more bytes in the range [80 .. BF].
|
||||
//
|
||||
// Specifically, the first byte holds two or more uppermost set bits,
|
||||
// a zero bit, and some payload; the second and later bytes each start with
|
||||
// their uppermost bit set, the next bit clear, and six bits of payload.
|
||||
// Payload parcels are in big-endian order. All bytes must be present in a
|
||||
// valid sequence; i.e., low-order sezo bits must be explicit. UTF-8 is
|
||||
// self-synchronizing on input as any byte value cannot be both a valid
|
||||
// first byte or trailing byte.
|
||||
//
|
||||
// 0xxxxxxx - 7 bit ASCII
|
||||
// 110xxxxx 10xxxxxx - 11-bit value
|
||||
// 1110xxxx 10xxxxxx 10xxxxxx - 16-bit value
|
||||
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21-bit value
|
||||
// 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26-bit value
|
||||
// 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 31-bit value
|
||||
// 11111110 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 36-bit value
|
||||
//
|
||||
// Canonical UTF-8 sequences should be minimal, and our output is so, but
|
||||
// we do not reject non-minimal sequences on input. Unicode only defines
|
||||
// code points up to 0x10FFFF, so 21-bit (4-byte) UTF-8 is the actual
|
||||
// standard maximum. However, we support extended forms up to 32 bits so that
|
||||
// CHARACTER(KIND=4) can be abused to hold arbitrary 32-bit data.
|
||||
|
||||
#ifndef FORTRAN_RUNTIME_UTF_H_
|
||||
#define FORTRAN_RUNTIME_UTF_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
namespace Fortran::runtime {
|
||||
|
||||
// Derive the length of a UTF-8 character encoding from its first byte.
|
||||
// A zero result signifies an invalid encoding.
|
||||
extern const std::uint8_t UTF8FirstByteTable[256];
|
||||
static inline std::size_t MeasureUTF8Bytes(char first) {
|
||||
return UTF8FirstByteTable[static_cast<std::uint8_t>(first)];
|
||||
}
|
||||
|
||||
static constexpr std::size_t maxUTF8Bytes{7};
|
||||
|
||||
// Ensure that all bytes are present in sequence in the input buffer
|
||||
// before calling; use MeasureUTF8Bytes(first byte) to count them.
|
||||
std::optional<char32_t> DecodeUTF8(const char *);
|
||||
|
||||
// Ensure that at least maxUTF8Bytes remain in the output
|
||||
// buffer before calling.
|
||||
std::size_t EncodeUTF8(char *, char32_t);
|
||||
|
||||
} // namespace Fortran::runtime
|
||||
#endif // FORTRAN_RUNTIME_UTF_H_
|
|
@ -553,6 +553,10 @@ TEST(ExternalIOTests, TestNonAvancingInput) {
|
|||
<< "Input-item value after non advancing read " << j;
|
||||
j++;
|
||||
}
|
||||
// CLOSE(UNIT=unit)
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for Close";
|
||||
}
|
||||
|
||||
TEST(ExternalIOTests, TestWriteAfterNonAvancingInput) {
|
||||
|
@ -645,9 +649,12 @@ TEST(ExternalIOTests, TestWriteAfterNonAvancingInput) {
|
|||
<< "InputAscii() ";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for Read ";
|
||||
|
||||
ASSERT_EQ(resultRecord, expectedRecord)
|
||||
<< "Record after non advancing read followed by write";
|
||||
// CLOSE(UNIT=unit)
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for Close";
|
||||
}
|
||||
|
||||
TEST(ExternalIOTests, TestWriteAfterEndfile) {
|
||||
|
@ -707,4 +714,184 @@ TEST(ExternalIOTests, TestWriteAfterEndfile) {
|
|||
ASSERT_FALSE(IONAME(InputInteger)(io, eof)) << "InputInteger(eof)";
|
||||
ASSERT_EQ(eof, -1) << "READ(eof)";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatEnd) << "EndIoStatement for READ";
|
||||
// CLOSE(UNIT=unit)
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for Close";
|
||||
}
|
||||
|
||||
TEST(ExternalIOTests, TestUTF8Encoding) {
|
||||
// OPEN(FILE="utf8test",NEWUNIT=unit,ACCESS='SEQUENTIAL',ACTION='READWRITE',&
|
||||
// FORM='FORMATTED',STATUS='REPLACE',ENCODING='UTF-8')
|
||||
auto *io{IONAME(BeginOpenNewUnit)(__FILE__, __LINE__)};
|
||||
ASSERT_TRUE(IONAME(SetAccess)(io, "SEQUENTIAL", 10))
|
||||
<< "SetAccess(SEQUENTIAL)";
|
||||
ASSERT_TRUE(IONAME(SetAction)(io, "READWRITE", 9)) << "SetAction(READWRITE)";
|
||||
ASSERT_TRUE(IONAME(SetFile)(io, "utf8test", 8)) << "SetFile(utf8test)";
|
||||
ASSERT_TRUE(IONAME(SetForm)(io, "FORMATTED", 9)) << "SetForm(FORMATTED)";
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "REPLACE", 7)) << "SetStatus(REPLACE)";
|
||||
ASSERT_TRUE(IONAME(SetEncoding)(io, "UTF-8", 5)) << "SetEncoding(UTF-8)";
|
||||
int unit{-1};
|
||||
ASSERT_TRUE(IONAME(GetNewUnit)(io, unit)) << "GetNewUnit()";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for first OPEN";
|
||||
char buffer[12];
|
||||
std::memcpy(buffer,
|
||||
"abc\x80\xff"
|
||||
"de\0\0\0\0\0",
|
||||
12);
|
||||
// WRITE(unit, *) buffer
|
||||
io = IONAME(BeginExternalListOutput)(unit, __FILE__, __LINE__);
|
||||
StaticDescriptor<0> staticDescriptor;
|
||||
Descriptor &desc{staticDescriptor.descriptor()};
|
||||
desc.Establish(TypeCode{CFI_type_char}, 7, buffer, 0);
|
||||
desc.Check();
|
||||
ASSERT_TRUE(IONAME(OutputDescriptor)(io, desc));
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for WRITE";
|
||||
// REWIND(unit)
|
||||
io = IONAME(BeginRewind)(unit, __FILE__, __LINE__);
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement for REWIND";
|
||||
// READ(unit, *) buffer
|
||||
desc.Establish(TypeCode(CFI_type_char), sizeof buffer, buffer, 0);
|
||||
desc.Check();
|
||||
io = IONAME(BeginExternalListInput)(unit, __FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(InputDescriptor)(io, desc));
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for first READ";
|
||||
ASSERT_EQ(std::memcmp(buffer,
|
||||
"abc\x80\xff"
|
||||
"de ",
|
||||
12),
|
||||
0);
|
||||
// CLOSE(UNIT=unit,STATUS='KEEP')
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "KEEP", 4)) << "SetStatus(KEEP)";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for first CLOSE";
|
||||
// OPEN(FILE="utf8test",NEWUNIT=unit,ACCESS='SEQUENTIAL',ACTION='READWRITE',&
|
||||
// FORM='FORMATTED',STATUS='OLD')
|
||||
io = IONAME(BeginOpenNewUnit)(__FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(SetAccess)(io, "SEQUENTIAL", 10))
|
||||
<< "SetAccess(SEQUENTIAL)";
|
||||
ASSERT_TRUE(IONAME(SetAction)(io, "READWRITE", 9)) << "SetAction(READWRITE)";
|
||||
ASSERT_TRUE(IONAME(SetFile)(io, "utf8test", 8)) << "SetFile(utf8test)";
|
||||
ASSERT_TRUE(IONAME(SetForm)(io, "FORMATTED", 9)) << "SetForm(FORMATTED)";
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "OLD", 3)) << "SetStatus(OLD)";
|
||||
ASSERT_TRUE(IONAME(GetNewUnit)(io, unit)) << "GetNewUnit()";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for second OPEN";
|
||||
// READ(unit, *) buffer
|
||||
io = IONAME(BeginExternalListInput)(unit, __FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(InputDescriptor)(io, desc));
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for second READ";
|
||||
ASSERT_EQ(std::memcmp(buffer,
|
||||
"abc\xc2\x80\xc3\xbf"
|
||||
"de ",
|
||||
12),
|
||||
0);
|
||||
// CLOSE(UNIT=unit,STATUS='DELETE')
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "DELETE", 6)) << "SetStatus(DELETE)";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for second CLOSE";
|
||||
}
|
||||
|
||||
TEST(ExternalIOTests, TestUCS) {
|
||||
// OPEN(FILE="ucstest',NEWUNIT=unit,ACCESS='SEQUENTIAL',ACTION='READWRITE',&
|
||||
// FORM='FORMATTED',STATUS='REPLACE',ENCODING='UTF-8')
|
||||
auto *io{IONAME(BeginOpenNewUnit)(__FILE__, __LINE__)};
|
||||
ASSERT_TRUE(IONAME(SetAccess)(io, "SEQUENTIAL", 10))
|
||||
<< "SetAccess(SEQUENTIAL)";
|
||||
ASSERT_TRUE(IONAME(SetAction)(io, "READWRITE", 9)) << "SetAction(READWRITE)";
|
||||
ASSERT_TRUE(IONAME(SetFile)(io, "ucstest", 7)) << "SetAction(ucstest)";
|
||||
ASSERT_TRUE(IONAME(SetForm)(io, "FORMATTED", 9)) << "SetForm(FORMATTED)";
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "REPLACE", 7)) << "SetStatus(REPLACE)";
|
||||
ASSERT_TRUE(IONAME(SetEncoding)(io, "UTF-8", 5)) << "SetEncoding(UTF-8)";
|
||||
int unit{-1};
|
||||
ASSERT_TRUE(IONAME(GetNewUnit)(io, unit)) << "GetNewUnit()";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for first OPEN";
|
||||
char32_t wbuffer[8]{U"abc\u0080\uffff"
|
||||
"de"};
|
||||
// WRITE(unit, *) wbuffec
|
||||
io = IONAME(BeginExternalListOutput)(unit, __FILE__, __LINE__);
|
||||
StaticDescriptor<0> staticDescriptor;
|
||||
Descriptor &desc{staticDescriptor.descriptor()};
|
||||
desc.Establish(TypeCode{CFI_type_char32_t}, sizeof wbuffer - sizeof(char32_t),
|
||||
wbuffer, 0);
|
||||
desc.Check();
|
||||
ASSERT_TRUE(IONAME(OutputDescriptor)(io, desc));
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for WRITE";
|
||||
// REWIND(unit)
|
||||
io = IONAME(BeginRewind)(unit, __FILE__, __LINE__);
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement for REWIND";
|
||||
// READ(unit, *) buffer
|
||||
io = IONAME(BeginExternalListInput)(unit, __FILE__, __LINE__);
|
||||
desc.Establish(TypeCode{CFI_type_char32_t}, sizeof wbuffer, wbuffer, 0);
|
||||
desc.Check();
|
||||
ASSERT_TRUE(IONAME(InputDescriptor)(io, desc));
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for first READ";
|
||||
char dump[80];
|
||||
dump[0] = '\0';
|
||||
for (int j{0}; j < 8; ++j) {
|
||||
std::size_t dumpLen{std::strlen(dump)};
|
||||
std::snprintf(
|
||||
dump + dumpLen, sizeof dump - dumpLen, " %x", (unsigned)wbuffer[j]);
|
||||
}
|
||||
EXPECT_EQ(wbuffer[0], U'a') << dump;
|
||||
EXPECT_EQ(wbuffer[1], U'b') << dump;
|
||||
EXPECT_EQ(wbuffer[2], U'c') << dump;
|
||||
EXPECT_EQ(wbuffer[3], U'\u0080') << dump;
|
||||
EXPECT_EQ(wbuffer[4], U'\uffff') << dump;
|
||||
EXPECT_EQ(wbuffer[5], U'd') << dump;
|
||||
EXPECT_EQ(wbuffer[6], U'e') << dump;
|
||||
EXPECT_EQ(wbuffer[7], U' ') << dump;
|
||||
// CLOSE(UNIT=unit,STATUS='KEEP')
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "KEEP", 4)) << "SetStatus(KEEP)";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for first CLOSE";
|
||||
// OPEN(FILE="ucstest",NEWUNIT=unit,ACCESS='SEQUENTIAL',ACTION='READWRITE',&
|
||||
// FORM='FORMATTED',STATUS='OLD')
|
||||
io = IONAME(BeginOpenNewUnit)(__FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(SetAccess)(io, "SEQUENTIAL", 10))
|
||||
<< "SetAccess(SEQUENTIAL)";
|
||||
ASSERT_TRUE(IONAME(SetAction)(io, "READWRITE", 9)) << "SetAction(READWRITE)";
|
||||
ASSERT_TRUE(IONAME(SetFile)(io, "ucstest", 7)) << "SetFile(ucstest)";
|
||||
ASSERT_TRUE(IONAME(SetForm)(io, "FORMATTED", 9)) << "SetForm(FORMATTED)";
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "OLD", 3)) << "SetStatus(OLD)";
|
||||
ASSERT_TRUE(IONAME(GetNewUnit)(io, unit)) << "GetNewUnit()";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for second OPEN";
|
||||
char buffer[12];
|
||||
// READ(unit, *) buffer
|
||||
io = IONAME(BeginExternalListInput)(unit, __FILE__, __LINE__);
|
||||
desc.Establish(TypeCode{CFI_type_char}, sizeof buffer, buffer, 0);
|
||||
desc.Check();
|
||||
ASSERT_TRUE(IONAME(InputDescriptor)(io, desc));
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for second READ";
|
||||
dump[0] = '\0';
|
||||
for (int j{0}; j < 12; ++j) {
|
||||
std::size_t dumpLen{std::strlen(dump)};
|
||||
std::snprintf(dump + dumpLen, sizeof dump - dumpLen, " %x",
|
||||
(unsigned)(unsigned char)buffer[j]);
|
||||
}
|
||||
EXPECT_EQ(std::memcmp(buffer,
|
||||
"abc\xc2\x80\xef\xbf\xbf"
|
||||
"de ",
|
||||
12),
|
||||
0)
|
||||
<< dump;
|
||||
// CLOSE(UNIT=unit,STATUS='DELETE')
|
||||
io = IONAME(BeginClose)(unit, __FILE__, __LINE__);
|
||||
ASSERT_TRUE(IONAME(SetStatus)(io, "DELETE", 6)) << "SetStatus(DELETE)";
|
||||
ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
|
||||
<< "EndIoStatement() for second CLOSE";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue