[flang] Remove encoding related checks in achar/char

Original-commit: flang-compiler/f18@86bc6a0252
Reviewed-on: https://github.com/flang-compiler/f18/pull/471
Tree-same-pre-rewrite: false
This commit is contained in:
Jean Perier 2019-06-05 06:41:03 -07:00
parent 6a1b208a4d
commit 6dd3ca917a
3 changed files with 20 additions and 51 deletions

View File

@ -18,16 +18,10 @@
#include "type.h"
#include <string>
// character(1) is ISO/IEC 646:1991 (ASCII)
//
// character(2) is a variant of EUC-JP: The code points are the same as EUC-JP,
// but the encoding has a two bytes fix-size (EUC-JP encoding has a variable
// length). The one byte EUC-JP character representations are simply
// zero-extended to two byte representations. The three bytes character
// representation of EUC-JP (JIS X 0212) are not supported in this internal
// encoding.
//
// character(4) is ISO/IEC 10646 UCS-4 (~ UTF-32)
// Provides implementations of intrinsic functions operating on character
// scalars. No assumption is made regarding character encodings other than they
// must be compatible with ASCII (else, NEW_LINE, ACHAR and IACHAR need to be
// adapted).
namespace Fortran::evaluate {
@ -36,27 +30,23 @@ template<int KIND> class CharacterUtils {
using CharT = typename Character::value_type;
public:
static constexpr bool IsValidCharacterCode(std::uint64_t code) {
if constexpr (KIND == 1) {
return IsValidASCII(code);
} else if constexpr (KIND == 2) {
return IsValidInternalEUC_JP(code);
} else if constexpr (KIND == 4) {
return IsValidUCS4(code);
} else {
static_assert(KIND != KIND, "bad character kind");
}
}
// CHAR also implements ACHAR
// CHAR also implements ACHAR under assumption that character encodings
// contain ASCII
static Character CHAR(std::uint64_t code) {
return Character{{static_cast<CharT>(code)}};
}
// ICHAR also implements IACHAR
static int ICHAR(const Character &c) {
// ICHAR also implements IACHAR under assumption that character encodings
// contain ASCII
static std::int64_t ICHAR(const Character &c) {
CHECK(c.length() == 1);
return c[0];
if constexpr (std::is_same_v<CharT, char>) {
// char may be signed, so cast it first to unsigned to avoid having
// ichar(char(128_4)) returning -128
return static_cast<unsigned char>(c[0]);
} else {
return c[0];
}
}
static Character NEW_LINE() { return Character{{NewLine()}}; }
@ -81,18 +71,7 @@ public:
}
private:
static constexpr bool IsValidASCII(std::uint64_t code) { return code < 128; }
static constexpr bool IsValidInternalEUC_JP(std::uint64_t code) {
std::uint16_t hi{static_cast<std::uint16_t>(code >> 8)};
std::uint16_t lo{static_cast<std::uint16_t>(code & 0xff)};
return IsValidASCII(code) ||
(code < 0xffff &&
((0xa1 <= hi && hi <= 0xfe && 0xa1 <= lo && lo <= 0xfe) ||
(hi == 0x8e && 0xa1 <= lo && lo <= 0xdf)));
}
static constexpr bool IsValidUCS4(std::uint64_t code) {
return code < 0xd800 || (0xdc00 < code && code <= 0x10ffff);
}
// Following helpers assume that character encodings contain ASCII
static constexpr CharT Space() { return 0x20; }
static constexpr CharT NewLine() { return 0x0a; }
};

View File

@ -911,24 +911,14 @@ Expr<Type<TypeCategory::Character, KIND>> FoldOperation(FoldingContext &context,
if (auto *intrinsic{std::get_if<SpecificIntrinsic>(&funcRef.proc().u)}) {
std::string name{intrinsic->name};
if (name == "achar" || name == "char") {
const auto validate{name == "achar"
? &CharacterUtils<1>::IsValidCharacterCode
: &CharacterUtils<KIND>::IsValidCharacterCode};
auto *sn{UnwrapArgument<SomeInteger>(args[0])};
CHECK(sn != nullptr);
return std::visit(
[&funcRef, &context, &name, &validate](const auto &n) -> Expr<T> {
[&funcRef, &context, &name](const auto &n) -> Expr<T> {
using IntT = typename std::decay_t<decltype(n)>::Result;
return FoldElementalIntrinsic<T, IntT>(context, std::move(funcRef),
ScalarFunc<T, IntT>([&context, &name, &validate](
const Scalar<IntT> &i) {
std::uint64_t code{i.ToUInt64()};
if (!validate(code)) {
context.messages().Say(
"Character code %lld is invalid for CHARACTER(%d) type in %s intrinsic function"_en_US,
code, KIND, name);
}
return CharacterUtils<KIND>::CHAR(code);
ScalarFunc<T, IntT>([&context, &name](const Scalar<IntT> &i) {
return CharacterUtils<KIND>::CHAR(i.ToUInt64());
}));
},
sn->u);

Binary file not shown.