From 5d3c11337a62d7e6834c4bdeacbc5266506dde95 Mon Sep 17 00:00:00 2001 From: Howard Hinnant Date: Mon, 31 May 2010 20:58:54 +0000 Subject: [PATCH] [conversions.string] llvm-svn: 105254 --- libcxx/include/locale | 334 +++++++++++++++++- libcxx/src/locale.cpp | 6 +- .../conversions.string/converted.pass.cpp | 32 ++ .../conversions.string/ctor_codecvt.pass.cpp | 34 ++ .../ctor_codecvt_state.pass.cpp | 28 ++ .../ctor_err_string.pass.cpp | 64 ++++ .../conversions.string/from_bytes.pass.cpp | 37 ++ .../conversions.string/state.pass.cpp | 25 ++ .../conversions.string/to_bytes.pass.cpp | 37 ++ .../conversions.string/types.pass.cpp | 35 ++ 10 files changed, 627 insertions(+), 5 deletions(-) create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp create mode 100644 libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp diff --git a/libcxx/include/locale b/libcxx/include/locale index 09faccd0738f..92a33928ec56 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -81,9 +81,53 @@ template bool isalnum (charT c, const locale& loc); template bool isgraph (charT c, const locale& loc); template charT toupper(charT c, const locale& loc); template charT tolower(charT c, const locale& loc); -template class wstring_convert; + +template, + class Byte_alloc = allocator> +class wstring_convert +{ +public: + typedef basic_string, Byte_alloc> byte_string; + typedef basic_string, Wide_alloc> wide_string; + typedef typename Codecvt::state_type state_type; + typedef typename wide_string::traits_type::int_type int_type; + + wstring_convert(Codecvt* pcvt = new Codecvt); + wstring_convert(Codecvt* pcvt, state_type state); + wstring_convert(const byte_string& byte_err, + const wide_string& wide_err = wide_string()); + ~wstring_convert(); + + wide_string from_bytes(char byte); + wide_string from_bytes(const char* ptr); + wide_string from_bytes(const byte_string& str); + wide_string from_bytes(const char* first, const char* last); + + byte_string to_bytes(Elem wchar); + byte_string to_bytes(const Elem* wptr); + byte_string to_bytes(const wide_string& wstr); + byte_string to_bytes(const Elem* first, const Elem* last); + + size_t converted() const; + state_type state() const; +}; + template > - class wbuffer_convert; +class wbuffer_convert + : public basic_streambuf +{ +public: + typedef typename Tr::state_type state_type; + + wbuffer_convert(streambuf* bytebuf = 0, Codecvt* pcvt = new Codecvt, + state_type state = state_type()); + + streambuf* rdbuf() const; + streambuf* rdbuf(streambuf* bytebuf); + + state_type state() const; +}; // 22.4.1 and 22.4.1.3, ctype: class ctype_base; @@ -3496,6 +3540,292 @@ protected: extern template class messages_byname; extern template class messages_byname; +template, + class _Byte_alloc = allocator > +class wstring_convert +{ +public: + typedef basic_string, _Byte_alloc> byte_string; + typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; + typedef typename _Codecvt::state_type state_type; + typedef typename wide_string::traits_type::int_type int_type; + +private: + byte_string __byte_err_string_; + wide_string __wide_err_string_; + _Codecvt* __cvtptr_; + state_type __cvtstate_; + size_t __cvtcount_; + + wstring_convert(const wstring_convert& __wc); + wstring_convert& operator=(const wstring_convert& __wc); +public: + wstring_convert(_Codecvt* __pcvt = new _Codecvt); + wstring_convert(_Codecvt* __pcvt, state_type __state); + wstring_convert(const byte_string& __byte_err, + const wide_string& __wide_err = wide_string()); +#ifdef _LIBCPP_MOVE + wstring_convert(wstring_convert&& __wc); +#endif + ~wstring_convert(); + + wide_string from_bytes(char __byte) + {return from_bytes(&__byte, &__byte+1);} + wide_string from_bytes(const char* __ptr) + {return from_bytes(__ptr, __ptr + char_traits::length(__ptr));} + wide_string from_bytes(const byte_string& __str) + {return from_bytes(__str.data(), __str.data() + __str.size());} + wide_string from_bytes(const char* __first, const char* __last); + + byte_string to_bytes(_Elem __wchar) + {return to_bytes(&__wchar, &__wchar+1);} + byte_string to_bytes(const _Elem* __wptr) + {return to_bytes(__wptr, __wptr + char_traits<_Elem>::length(__wptr));} + byte_string to_bytes(const wide_string& __wstr) + {return to_bytes(__wstr.data(), __wstr.data() + __wstr.size());} + byte_string to_bytes(const _Elem* __first, const _Elem* __last); + + size_t converted() const {return __cvtcount_;} + state_type state() const {return __cvtstate_;} +}; + +template +inline +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>:: + wstring_convert(_Codecvt* __pcvt) + : __cvtptr_(__pcvt), __cvtstate_(), __cvtcount_(0) +{ +} + +template +inline +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>:: + wstring_convert(_Codecvt* __pcvt, state_type __state) + : __cvtptr_(__pcvt), __cvtstate_(__state), __cvtcount_(0) +{ +} + +template +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>:: + wstring_convert(const byte_string& __byte_err, const wide_string& __wide_err) + : __byte_err_string_(__byte_err), __wide_err_string_(__wide_err), + __cvtstate_(), __cvtcount_(0) +{ + __cvtptr_ = new _Codecvt; +} + +#ifdef _LIBCPP_MOVE + +template +inline +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>:: + wstring_convert(wstring_convert&& __wc) + : __byte_err_string_(_STD::move(__wc.__byte_err_string_)), + __wide_err_string_(_STD::move(__wc.__wide_err_string_)), + __cvtptr_(__wc.__cvtptr_), + __cvtstate_(__wc.__cvtstate_), __cvtcount_(__wc.__cvtstate_) +{ + __wc.__cvtptr_ = nullptr; +} + +#endif + +template +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::~wstring_convert() +{ + delete __cvtptr_; +} + +template +typename wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::wide_string +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>:: + from_bytes(const char* __frm, const char* __frm_end) +{ + __cvtcount_ = 0; + if (__cvtptr_ != nullptr) + { + wide_string __ws(2*(__frm_end - __frm), _Elem()); + __ws.resize(__ws.capacity()); + codecvt_base::result __r = codecvt_base::ok; + state_type __st = __cvtstate_; + if (__frm != __frm_end) + { + _Elem* __to = &__ws[0]; + _Elem* __to_end = __to + __ws.size(); + const char* __frm_nxt; + do + { + _Elem* __to_nxt; + __r = __cvtptr_->in(__st, __frm, __frm_end, __frm_nxt, + __to, __to_end, __to_nxt); + __cvtcount_ += __frm_nxt - __frm; + if (__frm_nxt == __frm) + { + __r = codecvt_base::error; + } + else if (__r == codecvt_base::noconv) + { + __ws.resize(__to - &__ws[0]); + // This only gets executed if _Elem is char + __ws.append((const _Elem*)__frm, (const _Elem*)__frm_end); + __frm = __frm_nxt; + __r = codecvt_base::ok; + } + else if (__r == codecvt_base::ok) + { + __ws.resize(__to_nxt - &__ws[0]); + __frm = __frm_nxt; + } + else if (__r == codecvt_base::partial) + { + ptrdiff_t __s = __to_nxt - &__ws[0]; + __ws.resize(2 * __s); + __to = &__ws[0] + __s; + __to_end = &__ws[0] + __ws.size(); + __frm = __frm_nxt; + } + } while (__r == codecvt_base::partial && __frm_nxt < __frm_end); + } + if (__r == codecvt_base::ok) + return __ws; + } + if (__wide_err_string_.empty()) + throw range_error("wstring_convert: from_bytes error"); + return __wide_err_string_; +} + +template +typename wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>::byte_string +wstring_convert<_Codecvt, _Elem, _Wide_alloc, _Byte_alloc>:: + to_bytes(const _Elem* __frm, const _Elem* __frm_end) +{ + __cvtcount_ = 0; + if (__cvtptr_ != nullptr) + { + byte_string __bs(2*(__frm_end - __frm), char()); + __bs.resize(__bs.capacity()); + codecvt_base::result __r = codecvt_base::ok; + state_type __st = __cvtstate_; + if (__frm != __frm_end) + { + char* __to = &__bs[0]; + char* __to_end = __to + __bs.size(); + const _Elem* __frm_nxt; + do + { + char* __to_nxt; + __r = __cvtptr_->out(__st, __frm, __frm_end, __frm_nxt, + __to, __to_end, __to_nxt); + __cvtcount_ += __frm_nxt - __frm; + if (__frm_nxt == __frm) + { + __r = codecvt_base::error; + } + else if (__r == codecvt_base::noconv) + { + __bs.resize(__to - &__bs[0]); + // This only gets executed if _Elem is char + __bs.append((const char*)__frm, (const char*)__frm_end); + __frm = __frm_nxt; + __r = codecvt_base::ok; + } + else if (__r == codecvt_base::ok) + { + __bs.resize(__to_nxt - &__bs[0]); + __frm = __frm_nxt; + } + else if (__r == codecvt_base::partial) + { + ptrdiff_t __s = __to_nxt - &__bs[0]; + __bs.resize(2 * __s); + __to = &__bs[0] + __s; + __to_end = &__bs[0] + __bs.size(); + __frm = __frm_nxt; + } + } while (__r == codecvt_base::partial && __frm_nxt < __frm_end); + } + if (__r == codecvt_base::ok) + { + size_t __s = __bs.size(); + __bs.resize(__bs.capacity()); + char* __to = &__bs[0] + __s; + char* __to_end = __to + __bs.size(); + do + { + char* __to_nxt; + __r = __cvtptr_->unshift(__st, __to, __to_end, __to_nxt); + if (__r == codecvt_base::noconv) + { + __bs.resize(__to - &__bs[0]); + __r = codecvt_base::ok; + } + else if (__r == codecvt_base::ok) + { + __bs.resize(__to_nxt - &__bs[0]); + } + else if (__r == codecvt_base::partial) + { + ptrdiff_t __s = __to_nxt - &__bs[0]; + __bs.resize(2 * __s); + __to = &__bs[0] + __s; + __to_end = &__bs[0] + __bs.size(); + } + } while (__r == codecvt_base::partial); + if (__r == codecvt_base::ok) + return __bs; + } + } + if (__byte_err_string_.empty()) + throw range_error("wstring_convert: to_bytes error"); + return __byte_err_string_; +} + +template > +class wbuffer_convert + : public basic_streambuf<_Elem, _Tr> +{ +public: + // types: + typedef _Elem char_type; + typedef _Tr traits_type; + typedef typename traits_type::int_type int_type; + typedef typename traits_type::pos_type pos_type; + typedef typename traits_type::off_type off_type; + typedef typename _Codecvt::state_type state_type; + +private: + streambuf* __bufptr_; + _Codecvt* __cvtptr_; + state_type __cvtstate_; + +public: + wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, + state_type __state = state_type()) + : __bufptr_(__bytebuf), __cvtptr_(__pcvt), __cvtstate_(__state) {} + + ~wbuffer_convert() {delete __cvtptr_;} + + streambuf* rdbuf() const {return __bufptr_;} + streambuf* rdbuf(streambuf* __bytebuf) + { + streambuf* __r = __bufptr_; + __bufptr_ = __bytebuf; + return __r; + } + + state_type state() const {return __cvtstate_;} + +protected: + virtual int_type overflow (int_type __c = traits_type::eof()); +}; + +template +typename wbuffer_convert<_Codecvt, _Elem, _Tr>::int_type +wbuffer_convert<_Codecvt, _Elem, _Tr>::overflow(int_type __c) +{ +} + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_LOCALE diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index 6c41abfb70ec..e0d97dc670de 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -3197,7 +3197,7 @@ __codecvt_utf16::do_encoding() const throw() bool __codecvt_utf16::do_always_noconv() const throw() { - return true; + return false; } int @@ -3347,7 +3347,7 @@ __codecvt_utf16::do_encoding() const throw() bool __codecvt_utf16::do_always_noconv() const throw() { - return true; + return false; } int @@ -3497,7 +3497,7 @@ __codecvt_utf16::do_encoding() const throw() bool __codecvt_utf16::do_always_noconv() const throw() { - return true; + return false; } int diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp new file mode 100644 index 000000000000..67525b6ea3bf --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// size_t converted() const; + +#include +#include +#include + +int main() +{ + typedef std::codecvt_utf8 Codecvt; + typedef std::wstring_convert Myconv; + Myconv myconv; + assert(myconv.converted() == 0); + std::string bs = myconv.to_bytes(L"\x40003"); + assert(myconv.converted() == 1); + bs = myconv.to_bytes(L"\x40003\x65"); + assert(myconv.converted() == 2); + std::wstring ws = myconv.from_bytes("\xF1\x80\x80\x83"); + assert(myconv.converted() == 4); +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp new file mode 100644 index 000000000000..bdc04d8cc744 --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// wstring_convert(Codecvt* pcvt = new Codecvt); + +#include +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 Codecvt; + typedef std::wstring_convert Myconv; + Myconv myconv; + assert(myconv.converted() == 0); + } + { + typedef std::codecvt_utf8 Codecvt; + typedef std::wstring_convert Myconv; + Myconv myconv(new Codecvt); + assert(myconv.converted() == 0); + } +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp new file mode 100644 index 000000000000..79bb23d9e206 --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// wstring_convert(Codecvt* pcvt, state_type state); + +#include +#include +#include + +int main() +{ + { + typedef std::codecvt_utf8 Codecvt; + typedef std::wstring_convert Myconv; + Myconv myconv(new Codecvt, std::mbstate_t()); + assert(myconv.converted() == 0); + } +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp new file mode 100644 index 000000000000..2b798c8d20e1 --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// wstring_convert(const byte_string& byte_err, +// const wide_string& wide_err = wide_string()); + +#include +#include +#include + +int main() +{ + typedef std::codecvt_utf8 Codecvt; + typedef std::wstring_convert Myconv; + { + Myconv myconv; + try + { + myconv.to_bytes(L"\xDA83"); + assert(false); + } + catch (const std::range_error&) + { + } + try + { + myconv.from_bytes('\xA5'); + assert(false); + } + catch (const std::range_error&) + { + } + } + { + Myconv myconv("byte error"); + std::string bs = myconv.to_bytes(L"\xDA83"); + assert(bs == "byte error"); + try + { + myconv.from_bytes('\xA5'); + assert(false); + } + catch (const std::range_error&) + { + } + } + { + Myconv myconv("byte error", L"wide error"); + std::string bs = myconv.to_bytes(L"\xDA83"); + assert(bs == "byte error"); + std::wstring ws = myconv.from_bytes('\xA5'); + assert(ws == L"wide error"); + } +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp new file mode 100644 index 000000000000..a7eb3c2dc216 --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// wide_string from_bytes(char byte); +// wide_string from_bytes(const char* ptr); +// wide_string from_bytes(const byte_string& str); +// wide_string from_bytes(const char* first, const char* last); + +#include +#include +#include + +int main() +{ + { + std::wstring_convert > myconv; + std::string bs("\xF1\x80\x80\x83"); + std::wstring ws = myconv.from_bytes('a'); + assert(ws == L"a"); + ws = myconv.from_bytes(bs.c_str()); + assert(ws == L"\x40003"); + ws = myconv.from_bytes(bs); + assert(ws == L"\x40003"); + ws = myconv.from_bytes(bs.data(), bs.data() + bs.size()); + assert(ws == L"\x40003"); + } +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp new file mode 100644 index 000000000000..56df6609b84f --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// state_type state() const; + +#include +#include + +int main() +{ + typedef std::codecvt_utf8 Codecvt; + typedef std::wstring_convert Myconv; + Myconv myconv; + std::mbstate_t s = myconv.state(); +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp new file mode 100644 index 000000000000..39600f768206 --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// wstring_convert + +// byte_string to_bytes(Elem wchar); +// byte_string to_bytes(const Elem* wptr); +// byte_string to_bytes(const wide_string& wstr); +// byte_string to_bytes(const Elem* first, const Elem* last); + +#include +#include +#include + +int main() +{ + { + std::wstring_convert > myconv; + std::wstring ws(1, L'\x40003'); + std::string bs = myconv.to_bytes(ws[0]); + assert(bs == "\xF1\x80\x80\x83"); + bs = myconv.to_bytes(ws.c_str()); + assert(bs == "\xF1\x80\x80\x83"); + bs = myconv.to_bytes(ws); + assert(bs == "\xF1\x80\x80\x83"); + bs = myconv.to_bytes(ws.data(), ws.data() + ws.size()); + assert(bs == "\xF1\x80\x80\x83"); + } +} diff --git a/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp new file mode 100644 index 000000000000..12a55e6a7909 --- /dev/null +++ b/libcxx/test/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template, +// class Byte_alloc = allocator> +// class wstring_convert +// { +// public: +// typedef basic_string, Byte_alloc> byte_string; +// typedef basic_string, Wide_alloc> wide_string; +// typedef typename Codecvt::state_type state_type; +// typedef typename wide_string::traits_type::int_type int_type; + +#include +#include + +int main() +{ + { + typedef std::wstring_convert > myconv; + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::int_type>::value), ""); + } +}