From 29f342c6b5085d8d4221a346f59e58198ce6dd33 Mon Sep 17 00:00:00 2001 From: Jonathan Roelofs Date: Wed, 11 Mar 2015 17:00:28 +0000 Subject: [PATCH] Fix ctype_byname::do_is() mask checking.... again This basically reverts the revert in r216508, and fixes a few more cases while I'm at it. Reading my commit message on that commit again, I think it's bupkis. http://reviews.llvm.org/D8237 llvm-svn: 231940 --- libcxx/include/__locale | 4 ++ libcxx/src/locale.cpp | 68 +++++++++++-------- .../locale.ctype.byname/mask.pass.cpp | 53 +++++++++++++++ 3 files changed, 95 insertions(+), 30 deletions(-) create mode 100644 libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 625d0f85e1d0..19895582cac8 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -353,6 +353,7 @@ public: static const mask punct = _PUNCT; static const mask xdigit = _HEX; static const mask blank = _BLANK; +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) # ifdef __APPLE__ typedef __uint32_t mask; @@ -401,6 +402,9 @@ public: static const mask punct = _P; static const mask xdigit = _X | _N; static const mask blank = _B; +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT #else typedef unsigned long mask; static const mask space = 1<<0; diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index 729022689adf..797bd0570789 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -1263,16 +1263,16 @@ ctype_byname::do_is(mask m, char_type c) const #else bool result = false; wint_t ch = static_cast(c); - if (m & space) result |= (iswspace_l(ch, __l) != 0); - if (m & print) result |= (iswprint_l(ch, __l) != 0); - if (m & cntrl) result |= (iswcntrl_l(ch, __l) != 0); - if (m & upper) result |= (iswupper_l(ch, __l) != 0); - if (m & lower) result |= (iswlower_l(ch, __l) != 0); - if (m & alpha) result |= (iswalpha_l(ch, __l) != 0); - if (m & digit) result |= (iswdigit_l(ch, __l) != 0); - if (m & punct) result |= (iswpunct_l(ch, __l) != 0); - if (m & xdigit) result |= (iswxdigit_l(ch, __l) != 0); - if (m & blank) result |= (iswblank_l(ch, __l) != 0); + if ((m & space) == space) result |= (iswspace_l(ch, __l) != 0); + if ((m & print) == print) result |= (iswprint_l(ch, __l) != 0); + if ((m & cntrl) == cntrl) result |= (iswcntrl_l(ch, __l) != 0); + if ((m & upper) == upper) result |= (iswupper_l(ch, __l) != 0); + if ((m & lower) == lower) result |= (iswlower_l(ch, __l) != 0); + if ((m & alpha) == alpha) result |= (iswalpha_l(ch, __l) != 0); + if ((m & digit) == digit) result |= (iswdigit_l(ch, __l) != 0); + if ((m & punct) == punct) result |= (iswpunct_l(ch, __l) != 0); + if ((m & xdigit) == xdigit) result |= (iswxdigit_l(ch, __l) != 0); + if ((m & blank) == blank) result |= (iswblank_l(ch, __l) != 0); return result; #endif } @@ -1290,22 +1290,30 @@ ctype_byname::do_is(const char_type* low, const char_type* high, mask* wint_t ch = static_cast(*low); if (iswspace_l(ch, __l)) *vec |= space; +#ifndef _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT if (iswprint_l(ch, __l)) *vec |= print; +#endif if (iswcntrl_l(ch, __l)) *vec |= cntrl; if (iswupper_l(ch, __l)) *vec |= upper; if (iswlower_l(ch, __l)) *vec |= lower; +#ifndef _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA if (iswalpha_l(ch, __l)) *vec |= alpha; +#endif if (iswdigit_l(ch, __l)) *vec |= digit; if (iswpunct_l(ch, __l)) *vec |= punct; +#ifndef _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT if (iswxdigit_l(ch, __l)) *vec |= xdigit; +#endif + if (iswblank_l(ch, __l)) + *vec |= blank; } } return low; @@ -1321,16 +1329,16 @@ ctype_byname::do_scan_is(mask m, const char_type* low, const char_type* break; #else wint_t ch = static_cast(*low); - if (m & space && iswspace_l(ch, __l)) break; - if (m & print && iswprint_l(ch, __l)) break; - if (m & cntrl && iswcntrl_l(ch, __l)) break; - if (m & upper && iswupper_l(ch, __l)) break; - if (m & lower && iswlower_l(ch, __l)) break; - if (m & alpha && iswalpha_l(ch, __l)) break; - if (m & digit && iswdigit_l(ch, __l)) break; - if (m & punct && iswpunct_l(ch, __l)) break; - if (m & xdigit && iswxdigit_l(ch, __l)) break; - if (m & blank && iswblank_l(ch, __l)) break; + if ((m & space) == space && iswspace_l(ch, __l)) break; + if ((m & print) == print && iswprint_l(ch, __l)) break; + if ((m & cntrl) == cntrl && iswcntrl_l(ch, __l)) break; + if ((m & upper) == upper && iswupper_l(ch, __l)) break; + if ((m & lower) == lower && iswlower_l(ch, __l)) break; + if ((m & alpha) == alpha && iswalpha_l(ch, __l)) break; + if ((m & digit) == digit && iswdigit_l(ch, __l)) break; + if ((m & punct) == punct && iswpunct_l(ch, __l)) break; + if ((m & xdigit) == xdigit && iswxdigit_l(ch, __l)) break; + if ((m & blank) == blank && iswblank_l(ch, __l)) break; #endif } return low; @@ -1346,16 +1354,16 @@ ctype_byname::do_scan_not(mask m, const char_type* low, const char_type break; #else wint_t ch = static_cast(*low); - if (m & space && iswspace_l(ch, __l)) continue; - if (m & print && iswprint_l(ch, __l)) continue; - if (m & cntrl && iswcntrl_l(ch, __l)) continue; - if (m & upper && iswupper_l(ch, __l)) continue; - if (m & lower && iswlower_l(ch, __l)) continue; - if (m & alpha && iswalpha_l(ch, __l)) continue; - if (m & digit && iswdigit_l(ch, __l)) continue; - if (m & punct && iswpunct_l(ch, __l)) continue; - if (m & xdigit && iswxdigit_l(ch, __l)) continue; - if (m & blank && iswblank_l(ch, __l)) continue; + if ((m & space) == space && iswspace_l(ch, __l)) continue; + if ((m & print) == print && iswprint_l(ch, __l)) continue; + if ((m & cntrl) == cntrl && iswcntrl_l(ch, __l)) continue; + if ((m & upper) == upper && iswupper_l(ch, __l)) continue; + if ((m & lower) == lower && iswlower_l(ch, __l)) continue; + if ((m & alpha) == alpha && iswalpha_l(ch, __l)) continue; + if ((m & digit) == digit && iswdigit_l(ch, __l)) continue; + if ((m & punct) == punct && iswpunct_l(ch, __l)) continue; + if ((m & xdigit) == xdigit && iswxdigit_l(ch, __l)) continue; + if ((m & blank) == blank && iswblank_l(ch, __l)) continue; break; #endif } diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp new file mode 100644 index 000000000000..a09072a98788 --- /dev/null +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template class ctype_byname; + +// bool is(mask m, charT c) const; + +#include +#include +#include + +int main() +{ + { + std::locale l("C"); + { + typedef std::ctype WF; + const WF& wf = std::use_facet(l); + typedef std::ctype CF; + const CF& cf = std::use_facet(l); + + // The ctype masks in Newlib don't form a proper bitmask because + // the mask is only 8 bits wide, and there are more than 8 mask + // kinds. This means that the mask for alpha is (_U | _L), which + // is tricky to match in the do_is implementation because in + // [22.4.1.1.2 2] the standard specifies that the match code behaves + // like (m & M) != 0, but following this exactly would give false + // positives for characters that are both 'upper' and 'alpha', but + // not 'lower', for example. + assert( wf.is(WF::upper, L'A')); + assert( cf.is(CF::upper, 'A')); + assert(!wf.is(WF::lower, L'A')); + assert(!cf.is(CF::lower, 'A')); + assert( wf.is(WF::alpha, L'A')); + assert( cf.is(CF::alpha, 'A')); + + assert(!wf.is(WF::upper, L'a')); + assert(!cf.is(CF::upper, 'a')); + assert( wf.is(WF::lower, L'a')); + assert( cf.is(CF::lower, 'a')); + assert( wf.is(WF::alpha, L'a')); + assert( cf.is(CF::alpha, 'a')); + } + } +}