[flang] Fix UTF-8 bugs and add related tests

Original-commit: flang-compiler/f18@9dd19ede9e
Reviewed-on: https://github.com/flang-compiler/f18/pull/471
Tree-same-pre-rewrite: false
This commit is contained in:
Jean Perier 2019-05-28 02:15:05 -07:00
parent 958e4e691e
commit bc30bef24b
4 changed files with 46 additions and 3 deletions

View File

@ -88,8 +88,15 @@ std::string QuoteCharacterLiteralHelper(
std::string result{'"'};
const auto emit{[&](char ch) { result += ch; }};
for (auto ch : str) {
char32_t ch32{static_cast<unsigned char>(ch)};
EmitQuotedChar(ch32, emit, emit, doubleDoubleQuotes, doubleBackslash);
using CharT = std::decay_t<decltype(ch)>;
if constexpr (std::is_same_v<char, CharT>) {
// char may be signed depending on host.
char32_t ch32{static_cast<unsigned char>(ch)};
EmitQuotedChar(ch32, emit, emit, doubleDoubleQuotes, doubleBackslash);
} else {
char32_t ch32{ch};
EmitQuotedChar(ch32, emit, emit, doubleDoubleQuotes, doubleBackslash);
}
}
result += '"';
return result;
@ -136,6 +143,7 @@ std::optional<std::u32string> DecodeUTF8(const std::string &s) {
return std::nullopt; // not valid UTF-8
}
}
result.append(1, ch);
bytes -= charBytes;
}
return {result};

View File

@ -545,7 +545,7 @@ void Prescanner::QuotedCharacterLiteral(
bool escape{false};
bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
while (true) {
char ch{*at_};
unsigned char ch{static_cast<unsigned char>(*at_)};
escape = !escape && ch == '\\' && escapesEnabled;
EmitQuotedChar(ch, emit, insert, false, !escapesEnabled);
while (PadOutCharacterLiteral(tokens)) {

View File

@ -175,6 +175,7 @@ set(MODFILE_TESTS
modfile25.f90
modfile26.f90
modfile27.f90
modfile28.f90
)
set(LABEL_TESTS

View File

@ -0,0 +1,34 @@
! Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
!
! Licensed under the Apache License, Version 2.0 (the "License");
! you may not use this file except in compliance with the License.
! You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing, software
! distributed under the License is distributed on an "AS IS" BASIS,
! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
! See the License for the specific language governing permissions and
! limitations under the License.
! Test UTF-8 support in character literals
! TODO: test EUC-JP
module m
character(kind=4,len=:), parameter :: c4 = 4_"Hi! 你好!"
character(kind=1,len=:), parameter :: c1 = 1_"Hi! 你好!"
character(kind=4,len=:), parameter :: c4a(:) = [4_"一", 4_"二", 4_"三", 4_"四", 4_"五"]
integer, parameter :: lc4 = len(c4)
integer, parameter :: lc1 = len(c1)
end module m
!Expect: m.mod
!module m
!character(:,4),parameter::c4=4_"Hi! 你好!"
!character(:,1),parameter::c1=1_"Hi! \344\275\240\345\245\275!"
!character(:,4),parameter::c4a(1_8:)=[CHARACTER(KIND=4,LEN=1)::"一","二","三","四","五"]
!integer(4),parameter::lc4=7_4
!integer(4),parameter::lc1=11_4
!end