[libcxx] [test] Use proper UTF-8 locales on Windows

Since Windows 10 version 1803 (10.0.17134.0) (or Windows Server 2019),
the Windows Universal C Runtime (UCRT) actually does support UTF-8
locales - they're available e.g. via the same names as commonly on Unices,
e.g. "en_US.UTF-8".

The UTF-8 locale support unfortunately has a bug which breaks a couple
tests that were passing previously. That bug is fixed in the very
latest version of the UCRT (in UCRT 10.0.20348.0, available in Windows
11 or Windows Server 2022), so it will get resolved at some point
eventually, provided that the CI environment does get upgraded to a
newer version of Windows Server.

While the net number of xfailed/passing tests in this patch is a loss,
this does allow fixing a lot more locale tests properly for Windows
in later patches.

Intentionally not touching the ISO-8859-1/2 locales used for testing;
they're not detected and tested/used right now, and fixing that up
is another project.

Differential Revision: https://reviews.llvm.org/D119930
This commit is contained in:
Martin Storsjö 2022-01-25 13:58:43 +00:00
parent e217ebcc96
commit 38d25aecdf
13 changed files with 69 additions and 35 deletions

View File

@ -216,6 +216,28 @@ class TestProgramOutput(SetupConfigs):
self.assertEqual(dsl.programOutput(self.config, source), "STDOUT-OUTPUT")
class TestProgramSucceeds(SetupConfigs):
"""
Tests for libcxx.test.dsl.programSucceeds
"""
def test_success(self):
source = """
int main(int, char**) { return 0; }
"""
self.assertTrue(dsl.programSucceeds(self.config, source))
def test_failure(self):
source = """
int main(int, char**) { return 1; }
"""
self.assertFalse(dsl.programSucceeds(self.config, source))
def test_compile_failure(self):
source = """
this does not compile
"""
self.assertRaises(dsl.ConfigurationCompilationError, lambda: dsl.programSucceeds(self.config, source))
class TestHasLocale(SetupConfigs):
"""
Tests for libcxx.test.dsl.hasLocale

View File

@ -8,8 +8,6 @@
// REQUIRES: locale.en_US.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// <fstream>
// int_type overflow(int_type c = traits::eof());

View File

@ -9,8 +9,6 @@
// REQUIRES: locale.en_US.UTF-8
// FILE_DEPENDENCIES: underflow.dat, underflow_utf8.dat
// XFAIL: LIBCXX-WINDOWS-FIXME
// <fstream>
// int_type underflow();

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
// REQUIRES: locale.en_US.UTF-8
// XFAIL: broken-utf8-wchar-ctype
// <locale>

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
// REQUIRES: locale.en_US.UTF-8
// XFAIL: broken-utf8-wchar-ctype
// <locale>

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
// REQUIRES: locale.en_US.UTF-8
// XFAIL: broken-utf8-wchar-ctype
// <locale>

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
// REQUIRES: locale.en_US.UTF-8
// XFAIL: broken-utf8-wchar-ctype
// <locale>

View File

@ -10,8 +10,6 @@
// REQUIRES: locale.fr_FR.UTF-8
// REQUIRES: locale.zh_CN.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// <locale>
// class time_get_byname<charT, InputIterator>

View File

@ -11,8 +11,6 @@
// REQUIRES: locale.ru_RU.UTF-8
// REQUIRES: locale.zh_CN.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// <locale>
// class time_get_byname<charT, InputIterator>

View File

@ -13,6 +13,7 @@
// charT translate_nocase(charT c) const;
// REQUIRES: locale.en_US.UTF-8
// XFAIL: broken-utf8-wchar-ctype
#include <regex>
#include <cassert>

View File

@ -15,30 +15,21 @@
#define PLATFORM_SUPPORT_H
// locale names
#ifdef _WIN32
// WARNING: Windows does not support UTF-8 codepages.
// Locales are "converted" using https://docs.moodle.org/dev/Table_of_locales
# define LOCALE_en_US "en-US"
# define LOCALE_en_US_UTF_8 "en-US"
# define LOCALE_cs_CZ_ISO8859_2 "cs-CZ"
# define LOCALE_fr_FR_UTF_8 "fr-FR"
# define LOCALE_fr_CA_ISO8859_1 "fr-CA"
# define LOCALE_ru_RU_UTF_8 "ru-RU"
# define LOCALE_zh_CN_UTF_8 "zh-CN"
#define LOCALE_en_US "en_US"
#define LOCALE_en_US_UTF_8 "en_US.UTF-8"
#define LOCALE_fr_FR_UTF_8 "fr_FR.UTF-8"
#ifdef __linux__
# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1"
# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2"
#elif defined(_WIN32)
# define LOCALE_fr_CA_ISO8859_1 "fr-CA"
# define LOCALE_cs_CZ_ISO8859_2 "cs-CZ"
#else
# define LOCALE_en_US "en_US"
# define LOCALE_en_US_UTF_8 "en_US.UTF-8"
# define LOCALE_fr_FR_UTF_8 "fr_FR.UTF-8"
# ifdef __linux__
# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1"
# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2"
# else
# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO8859-1"
# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO8859-2"
# endif
# define LOCALE_ru_RU_UTF_8 "ru_RU.UTF-8"
# define LOCALE_zh_CN_UTF_8 "zh_CN.UTF-8"
# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO8859-1"
# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO8859-2"
#endif
#define LOCALE_ru_RU_UTF_8 "ru_RU.UTF-8"
#define LOCALE_zh_CN_UTF_8 "zh_CN.UTF-8"
#include <stdio.h>
#include <stdlib.h>

View File

@ -175,6 +175,22 @@ def programOutput(config, program, args=None):
actualOut = actualOut.group(1) if actualOut else ""
return actualOut
@_memoizeExpensiveOperation(lambda c, p, args=None: (c.substitutions, c.environment, p, args))
def programSucceeds(config, program, args=None):
"""
Compiles a program for the test target, run it on the test target and return
whether it completed successfully.
Note that execution of the program is done through the %{exec} substitution,
which means that the program may be run on a remote host depending on what
%{exec} does.
"""
try:
programOutput(config, program, args)
except ConfigurationRuntimeError:
return False
return True
@_memoizeExpensiveOperation(lambda c, f: (c.substitutions, c.environment, f))
def hasCompileFlag(config, flag):
"""
@ -229,11 +245,7 @@ def hasAnyLocale(config, locales):
}
#endif
"""
try:
programOutput(config, program, args=[pipes.quote(l) for l in locales])
except ConfigurationRuntimeError:
return False
return True
return programSucceeds(config, program, args=[pipes.quote(l) for l in locales])
@_memoizeExpensiveOperation(lambda c, flags='': (c.substitutions, c.environment, flags))
def compilerMacros(config, flags=''):

View File

@ -73,6 +73,18 @@ DEFAULT_FEATURES = [
void f() { new int(3); }
""", ['-shared'])),
# Check for a Windows UCRT bug (fixed in UCRT/Windows 10.0.20348.0):
# https://developercommunity.visualstudio.com/t/utf-8-locales-break-ctype-functions-for-wchar-type/1653678
Feature(name='broken-utf8-wchar-ctype',
when=lambda cfg: '_WIN32' in compilerMacros(cfg) and not programSucceeds(cfg, """
#include <locale.h>
#include <wctype.h>
int main(int, char**) {
setlocale(LC_ALL, "en_US.UTF-8");
return towlower(L'\\xDA') != L'\\xFA';
}
""")),
# Whether Bash can run on the executor.
# This is not always the case, for example when running on embedded systems.
#