|
Boost-Commit : |
Subject: [Boost-commit] svn:boost r64531 - in sandbox/SOC/2009/unicode: boost/iterator boost/unicode libs/unicode/build libs/unicode/doc libs/unicode/test libs/unicode/test/iterator libs/unicode/test/unicode
From: loufoque_at_[hidden]
Date: 2010-08-01 17:15:09
Author: mgaunard
Date: 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
New Revision: 64531
URL: http://svn.boost.org/trac/boost/changeset/64531
Log:
feature: create converters from codecvt + locale encode/decode
Added:
sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp (contents, props changed)
sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp
- copied, changed from r64500, /sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp
sandbox/SOC/2009/unicode/boost/unicode/codecvt.hpp (contents, props changed)
sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp (contents, props changed)
Removed:
sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp
Text files modified:
sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp | 7 ++
sandbox/SOC/2009/unicode/boost/unicode/utf.hpp | 6 ++
sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp | 96 ++++++++++++++++++++++++++++++++++++++-
sandbox/SOC/2009/unicode/libs/unicode/build/Jamfile.v2 | 2
sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2 | 2
sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk | 7 ++
sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2 | 1
sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp | 27 +----------
8 files changed, 116 insertions(+), 32 deletions(-)
Added: sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -0,0 +1,151 @@
+#ifndef BOOST_ITERATOR_CODECVT_CONVERTER_HPP
+#define BOOST_ITERATOR_CODECVT_CONVERTER_HPP
+
+#include <algorithm>
+#include <stdexcept>
+#include <locale>
+
+#include <boost/mpl/int.hpp>
+
+namespace boost
+{
+
+namespace detail
+{
+
+template<typename Input, typename Output, typename Codecvt>
+struct codecvt_converter
+{
+ typedef Input input_type;
+ typedef Output output_type;
+
+ // arbitrary max buffer size;
+ typedef mpl::int_<64> max_output;
+
+private:
+ typedef std::codecvt_base::result (Codecvt::*CvtFunction)(
+ std::mbstate_t&,
+ const Input*, const Input*, const Input*&,
+ Output*, Output*, Output*&
+ ) const;
+
+public:
+ codecvt_converter(const Codecvt& codecvt_, const CvtFunction& f_) : codecvt(codecvt_), f(f_)
+ {
+ }
+
+ template<typename In, typename Out>
+ Out ltr(In& begin, In end, Out out)
+ {
+ std::mbstate_t state;
+ memset(&state, 0, sizeof state);
+
+ Input buffer_in [max_output::value];
+ Output buffer_out[max_output::value];
+
+ const Input* from_next = buffer_in;
+ Output* to_next = buffer_out;
+
+ buffer_in[0] = *begin++;
+ for(;;) // until we have some output
+ {
+ for(size_t i=1; ; ++i) // until we get 'ok'
+ {
+ std::codecvt_base::result result = (codecvt.*f)(
+ state,
+ from_next, from_next + i, from_next,
+ to_next, to_next + max_output::value - (buffer_out - to_next), to_next
+ );
+ if(result == std::codecvt_base::error)
+ throw std::out_of_range("codecvt error");
+
+ if(result == std::codecvt_base::ok)
+ break;
+
+ if(begin == end)
+ throw std::out_of_range("unexpected end");
+ const_cast<Input*>(from_next)[i] = *begin++;
+ }
+
+ if(to_next - buffer_out)
+ break;
+
+ if(begin == end)
+ throw std::out_of_range("unexpected end");
+ *const_cast<Input*>(from_next) = *begin++;
+ }
+
+ // make sure we empty the state
+ Output* old_to_next;
+ do
+ {
+ old_to_next = to_next;
+ std::codecvt_base::result result = (codecvt.*f)(
+ state,
+ from_next, from_next, from_next,
+ to_next, to_next + max_output::value - (buffer_out - to_next), to_next
+ );
+
+ if(result == std::codecvt_base::error)
+ throw std::out_of_range("codecvt error");
+
+ if(result == std::codecvt_base::partial)
+ break;
+ }
+ while(to_next != old_to_next);
+
+ return std::copy(buffer_out, to_next, out);
+ }
+
+ template<typename In, typename Out>
+ Out rtl(In begin, In& end, Out out)
+ {
+ throw std::runtime_error("not implemented");
+ }
+
+private:
+ const Codecvt& codecvt;
+ CvtFunction f;
+};
+
+} // namespace detail
+
+template<typename Input, typename Output>
+struct codecvt_in_converter
+ : detail::codecvt_converter<
+ Input,
+ Output,
+ std::codecvt<Output, Input, std::mbstate_t>
+ >
+{
+private:
+ typedef std::codecvt<Output, Input, std::mbstate_t> Codecvt;
+
+public:
+ codecvt_in_converter(const Codecvt& codecvt)
+ : detail::codecvt_converter<Input, Output, Codecvt>(codecvt, &Codecvt::in)
+ {
+ }
+};
+
+template<typename Input, typename Output>
+struct codecvt_out_converter
+ : detail::codecvt_converter<
+ Input,
+ Output,
+ std::codecvt<Input, Output, std::mbstate_t>
+ >
+{
+private:
+ typedef std::codecvt<Input, Output, std::mbstate_t> Codecvt;
+
+public:
+ codecvt_out_converter(const Codecvt& codecvt)
+ : detail::codecvt_converter<Input, Output, Codecvt>(codecvt, &Codecvt::out)
+ {
+ }
+};
+
+} // namespace booost
+
+#endif
Copied: sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp (from r64500, /sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp)
==============================================================================
--- /sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -22,7 +22,7 @@
* When writing to a file, \c P1 is applied for segments of data on which \c B1 is true at the beginning and at the end.
* When reading a file, \c P2 is applied for segments of data on which \c B2 is true at the beginning and at the end. */
template<typename InternT, typename B1, typename P1, typename B2, typename P2>
-struct converter_codecvt_facet : std::codecvt<InternT, typename P1::output_type, std::mbstate_t>
+struct converter_codecvt : std::codecvt<InternT, typename P1::output_type, std::mbstate_t>
{
typedef InternT intern_type;
typedef typename P1::output_type extern_type;
@@ -36,7 +36,7 @@
BOOST_CONCEPT_ASSERT((Convertible<InternT, typename P1::input_type>));
BOOST_CONCEPT_ASSERT((Convertible<typename P2::output_type, InternT>));
- explicit converter_codecvt_facet(const B1& b1_ = B1(), const P1& p1_ = P1(), const B2& b2_ = B2(), const P2& p2_ = P2(), std::size_t refs = 0)
+ explicit converter_codecvt(const B1& b1_ = B1(), const P1& p1_ = P1(), const B2& b2_ = B2(), const P2& p2_ = P2(), std::size_t refs = 0)
: std::codecvt<intern_type, extern_type, state_type>(refs), b1(b1_), p1(p1_), b2(b2_), p2(p2_)
{
}
@@ -80,6 +80,9 @@
return std::codecvt_base::ok;
}
+ if(from_next == from_end)
+ return std::codecvt_base::ok;
+
try
{
st.pending_size = p2.ltr(from_next, from_end, st.pending_data) - st.pending_data;
Deleted: sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt_facet.hpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
+++ (empty file)
@@ -1,225 +0,0 @@
-#ifndef BOOST_ITERATOR_CONVERTER_CODECVT_FACET_HPP
-#define BOOST_ITERATOR_CONVERTER_CODECVT_FACET_HPP
-
-#include <locale>
-#include <cstddef>
-
-#include <boost/iterator/converter_concept.hpp>
-#include <boost/iterator/segmenter_concept.hpp>
-#include <boost/iterator/dummy_output_iterator.hpp>
-
-#include <algorithm>
-
-#include <map>
-#include <boost/range/algorithm.hpp>
-#include <boost/range/join.hpp>
-
-namespace boost
-{
-
-/** Builds a codecvt facet from two \c \xmlonly<conceptname>Converter</conceptname>s\endxmlonly
- * and two \c \xmlonly<conceptname>BoundaryChecker</conceptname>s\endxmlonly.
- * When writing to a file, \c P1 is applied for segments of data on which \c B1 is true at the beginning and at the end.
- * When reading a file, \c P2 is applied for segments of data on which \c B2 is true at the beginning and at the end. */
-template<typename InternT, typename B1, typename P1, typename B2, typename P2>
-struct converter_codecvt_facet : std::codecvt<InternT, typename P1::output_type, std::mbstate_t>
-{
- typedef InternT intern_type;
- typedef typename P1::output_type extern_type;
- typedef std::mbstate_t state_type;
-
- BOOST_CONCEPT_ASSERT((BoundaryCheckerConcept<B1>));
- BOOST_CONCEPT_ASSERT((ConverterConcept<P1>));
- BOOST_CONCEPT_ASSERT((BoundaryCheckerConcept<B2>));
- BOOST_CONCEPT_ASSERT((ConverterConcept<P2>));
-
- BOOST_CONCEPT_ASSERT((Convertible<InternT, typename P1::input_type>));
- BOOST_CONCEPT_ASSERT((Convertible<typename P2::output_type, InternT>));
-
- explicit converter_codecvt_facet(const B1& b1_ = B1(), const P1& p1_ = P1(), const B2& b2_ = B2(), const P2& p2_ = P2(), std::size_t refs = 0)
- : std::codecvt<intern_type, extern_type, state_type>(refs), b1(b1_), p1(p1_), b2(b2_), p2(p2_)
- {
- }
-
-private:
- struct state_t
- {
- intern_type pending_data[P2::max_output::value];
- size_t pending_size;
- };
- mutable std::map<state_type*, state_t> states;
-
- mutable B1 b1;
- mutable P1 p1;
-
- mutable B2 b2;
- mutable P2 p2;
-
-protected:
-
- virtual std::codecvt_base::result do_in(
- state_type& state,
- const extern_type* from,
- const extern_type* from_end,
- const extern_type*& from_next,
- intern_type* to,
- intern_type* to_end,
- intern_type*& to_next
- ) const
- {
- state_t& st = states[&state];
-
- from_next = from;
- to_next = to;
-
- if(st.pending_size)
- {
- *to_next++ = st.pending_data[0];
- std::copy(st.pending_data + 1, st.pending_data + st.pending_size, st.pending_data);
- st.pending_size--;
- return std::codecvt_base::ok;
- }
-
- try
- {
- st.pending_size = p2.ltr(from_next, from_end, st.pending_data) - st.pending_data;
- *to_next++ = st.pending_data[0];
- std::copy(st.pending_data + 1, st.pending_data + st.pending_size, st.pending_data);
- st.pending_size--;
- }
- catch(...)
- {
- return std::codecvt_base::partial;
- }
- return std::codecvt_base::ok;
- }
-
- virtual std::codecvt_base::result do_out(
- state_type& state,
- const intern_type* from,
- const intern_type* from_end,
- const intern_type*& from_next,
- extern_type* to,
- extern_type* to_end,
- extern_type*& to_next
- ) const
- {
- typedef const boost::iterator_range<const intern_type*> range_base;
- typedef boost::range_detail::join_iterator<const intern_type*, const intern_type*> iterator;
-
- state_t& st = states[&state];
-
- from_next = from;
- to_next = to;
-
- boost::joined_range<range_base, range_base> input = boost::join(
- range_base(st.pending_data, st.pending_data + st.pending_size),
- range_base(from, from_end)
- );
-
- iterator from2 = input.begin();
- iterator from_next2 = from2;
- iterator from_end2 = input.end();
-
- while(from_next2 != from_end2)
- {
- try
- {
- to_next = p1.ltr(from_next2, from_end2, to_next);
- }
- catch(...)
- {
- size_t written = from_next2 - from2;
- if(written >= st.pending_size)
- {
- from_next += (from_next2 - from2) - st.pending_size;
- st.pending_size = 0;
- }
-
- boost::copy(range_base(from_next, from_end), st.pending_data + st.pending_size);
- st.pending_size += (from_end - from_next);
- from_next = from_end;
- return std::codecvt_base::ok;
- }
- }
-
- size_t written = from_next2 - from2;
- if(written >= st.pending_size)
- {
- from_next += (from_next2 - from2) - st.pending_size;
- st.pending_size = 0;
- }
- return std::codecvt_base::ok;
- }
-
- virtual bool do_always_noconv() const throw()
- {
- return false;
- }
-
- virtual std::codecvt_base::result do_unshift(
- state_type& state,
- extern_type* to,
- extern_type* to_end,
- extern_type*& to_next
- ) const
- {
- state_t& st = states[&state];
-
- to_next = to;
- const intern_type* from = st.pending_data;
- const intern_type* from_next = from;
- const intern_type* from_end = st.pending_data + st.pending_size;
-
- while(from_next != from_end)
- {
- try
- {
- to_next = p1.ltr(from_next, from_end, to_next);
- }
- catch(...)
- {
- return std::codecvt_base::error;
- }
- }
-
- st.pending_size = 0;
- return std::codecvt_base::ok;
- }
-
- virtual int do_encoding() const throw()
- {
- return 0;
- }
-
- virtual int do_length(
- state_type&,
- const extern_type* from,
- const extern_type* from_end,
- std::size_t max_limit
- ) const
- {
- const extern_type* from_next = from;
- while(from_next != from_end && max_limit--)
- {
- try
- {
- p2.ltr(from_next, from_end, dummy_output_iterator());
- }
- catch(...)
- {
- break;
- }
- }
- return from_next - from;
- }
-
- virtual int do_max_length() const throw ()
- {
- return P1::max_output::value;
- }
-};
-
-} // namespace boost
-
-#endif
Added: sandbox/SOC/2009/unicode/boost/unicode/codecvt.hpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2009/unicode/boost/unicode/codecvt.hpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -0,0 +1,42 @@
+#ifndef BOOST_UNICODE_CODECVT_HPP
+#define BOOST_UNICODE_CODECVT_HPP
+
+#include <boost/unicode/utf_codecs.hpp>
+#include <boost/unicode/compose_fwd.hpp>
+#include <boost/iterator/converter_codecvt.hpp>
+
+namespace boost
+{
+
+namespace unicode
+{
+
+/** Codecvt facet that converts between UTF-X in-memory and UTF-8 in file,
+ * 'X' depending on the size of \c wchar_t.
+ * Also normalizes to NFC when reading the data from the file. */
+typedef converter_codecvt<
+ wchar_t,
+ utf_boundary,
+ utf_transcoder<char>,
+ utf_combine_boundary,
+ multi_converter<
+ converted_converter<utf_decoder, normalizer>,
+ utf_encoder<wchar_t>
+ >
+> utf_u8_normalize_codecvt;
+
+/** Codecvt facet that converts between UTF-X in-memory and UTF-8 in file,
+ * 'X' depending on the size of \c wchar_t. */
+typedef converter_codecvt<
+ wchar_t,
+ utf_boundary,
+ utf_transcoder<char>,
+ utf_boundary,
+ utf_transcoder<wchar_t>
+> utf_u8_codecvt;
+
+} // namespace unicode
+
+} // namespace boost
+
+#endif
Modified: sandbox/SOC/2009/unicode/boost/unicode/utf.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/utf.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/unicode/utf.hpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -34,6 +34,12 @@
BOOST_ONEMANYCONVERTER_TPL_DEF(BOOST_UNICODE_CAT(boost::unicode, utf_encoder<ValueType>), utf_encode)
BOOST_CONVERTER_TPL_DEF(BOOST_UNICODE_CAT(boost::unicode, utf_transcoder<ValueType>), utf_transcode)
+BOOST_CONVERTER_DEF(boost::unicode::locale_utf_transcoder, locale_utf_transcode)
+BOOST_CONVERTER_DEF(boost::unicode::utf_locale_transcoder, utf_locale_transcode)
+
+BOOST_CONVERTER_DEF(boost::unicode::locale_decoder, locale_decode)
+BOOST_CONVERTER_DEF(boost::unicode::locale_encoder, locale_encode)
+
} // namespace unicode
} // namespace boost
Modified: sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -4,7 +4,13 @@
#include <boost/assert.hpp>
#include <boost/throw_exception.hpp>
#include <boost/mpl/int.hpp>
+#include <boost/type_traits/is_same.hpp>
+#include <boost/mpl/and.hpp>
+#include <boost/mpl/bool.hpp>
+
#include <stdexcept>
+#include <cstdlib>
+
#ifndef BOOST_NO_STD_LOCALE
#include <sstream>
#include <ios>
@@ -13,9 +19,7 @@
#include <boost/unicode/surrogates.hpp>
#include <boost/iterator/convert_iterator.hpp>
-#include <boost/type_traits/is_same.hpp>
-#include <boost/mpl/and.hpp>
-#include <boost/mpl/bool.hpp>
+#include <boost/iterator/codecvt_converter.hpp>
#include <boost/detail/unspecified.hpp>
@@ -600,6 +604,92 @@
* that converts from UTF-32 to ISO-8859-1 alias latin-1. */
typedef boost::detail::unspecified< cast_converter<char> >::type latin1_encoder;
+struct locale_utf_transcoder
+{
+ typedef char input_type;
+ typedef wchar_t output_type;
+
+ typedef codecvt_out_converter<input_type, output_type>::max_output max_output;
+
+#ifdef BOOST_WINDOWS
+ locale_utf_transcoder(std::locale loc_ = std::locale()) : loc(loc_)
+#else
+ locale_utf_transcoder(std::locale loc_ = std::locale(getenv("LANG"))) : loc(loc_)
+#endif
+ {
+ }
+
+private:
+ typedef std::codecvt<output_type, input_type, std::mbstate_t> Codecvt;
+
+public:
+ template<typename In, typename Out>
+ Out ltr(In& begin, In end, Out out)
+ {
+ return codecvt_in_converter<input_type, output_type>(std::use_facet<Codecvt>(loc)).ltr(begin, end, out);
+ }
+
+ template<typename In, typename Out>
+ Out rtl(In begin, In& end, Out out)
+ {
+ return codecvt_in_converter<input_type, output_type>(std::use_facet<Codecvt>(loc)).rtl(begin, end, out);
+ }
+
+ std::locale loc;
+};
+
+struct utf_locale_transcoder
+{
+ typedef wchar_t input_type;
+ typedef char output_type;
+
+ typedef codecvt_out_converter<input_type, output_type>::max_output max_output;
+
+#ifdef BOOST_WINDOWS
+ utf_locale_transcoder(std::locale loc_ = std::locale()) : loc(loc_)
+#else
+ utf_locale_transcoder(std::locale loc_ = std::locale(getenv("LANG"))) : loc(loc_)
+#endif
+ {
+ }
+
+private:
+ typedef std::codecvt<input_type, output_type, std::mbstate_t> Codecvt;
+
+public:
+ template<typename In, typename Out>
+ Out ltr(In& begin, In end, Out out)
+ {
+ return codecvt_out_converter<input_type, output_type>(std::use_facet<Codecvt>(loc)).ltr(begin, end, out);
+ }
+
+ template<typename In, typename Out>
+ Out rtl(In begin, In& end, Out out)
+ {
+ return codecvt_out_converter<input_type, output_type>(std::use_facet<Codecvt>(loc)).rtl(begin, end, out);
+ }
+
+ std::locale loc;
+};
+
+/** Model of \c \xmlonly<conceptname>Converter</conceptname>\endxmlonly that
+ * converts from the locale narrow character set to UTF-32. */
+typedef boost::detail::unspecified<
+ multi_converter<
+ locale_utf_transcoder,
+ utf_decoder
+ >
+>::type locale_decoder;
+
+/** Model of \c \xmlonly<conceptname>Converter</conceptname>\endxmlonly that
+ * converts from UTF-32 to the locale narrow character set. */
+typedef boost::detail::unspecified<
+ multi_converter<
+ utf_encoder<wchar_t>,
+ utf_locale_transcoder
+ >
+>::type locale_encoder;
+
} // namespace unicode
} // namespace boost
Modified: sandbox/SOC/2009/unicode/libs/unicode/build/Jamfile.v2
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/build/Jamfile.v2 (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/build/Jamfile.v2 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -21,4 +21,6 @@
../src/ucd/uni_ucd_interface_impl_data.cpp
:
<link>shared:<define>BOOST_UNICODE_DYN_LINK=1
+ <debug-symbols>off
+ <strip>on
;
Modified: sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2 (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/doc/Jamfile.v2 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -52,7 +52,7 @@
doxygen autodoc1
:
- [ path.glob-tree ../../../boost/iterator : convert*.hpp segment*.hpp any_*.hpp : .svn detail ]
+ [ path.glob-tree ../../../boost/iterator : codecvt*.hpp convert*.hpp segment*.hpp any_*.hpp : .svn detail ]
[ path.glob-tree ../../../boost/range : any_*.hpp : .svn detail ]
:
Modified: sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -244,7 +244,7 @@
* a Unicode character database, which, for each Unicode code point, provides many properties,
* converters for decomposition, composition, and normalization,
* functions to concatenate normalized ranges,
-* segmenters for graphemes.
+* segmenters for graphemes, and in the close future words, sentences and line breaks.
This library defines the concepts of [conceptref Converter] and [conceptref Segmenter], which are mechanisms to arbitrarily convert or segment ranges of data, expressed as pairs of iterators.
The Converter and Segmenters framework allows to perform these either eaglery
@@ -505,11 +505,14 @@
Unfortunately, it appears it is only possible to use this mechanism with codecvt facets that have =char= as external and either
=char= or =wchar_t= as internal, but C++0x may improve the situation.
-To use [classref boost::converter_codecvt_facet], which allows to build a codecvt facet from converters, you will need two [conceptref Converter]s, one for each direction, as well as two [conceptref BoundaryChecker]s.
+To use [classref boost::converter_codecvt], which allows to build a codecvt facet from converters, you will need two [conceptref Converter]s, one for each direction, as well as two [conceptref BoundaryChecker]s.
Indeed, as codecvt facets are passed arbitrary input buffers, there needs to be a way to tell what is the right boundaries to apply the steps on.
An alternative would be to try to apply a step and try again if there was an error due to incomplete data. This is however not sufficient for
converters that are not stable by concatenation.
+You may also build converters out of codecvt facets with [classref boost::codecvt_in_converter] or [classref boost::codecvt_out_converter], or
+directly convert locales to UTF-32 with [classref boost::unicode::locale_decoder] or [classref boost::unicode::locale_encoder].
+
[import ../test/iterator/test_codecvt.cpp]
[test_codecvt]
Modified: sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2 (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/Jamfile.v2 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -29,6 +29,7 @@
[ run unicode/test_compose.cpp ]
[ run unicode/test_graphemes.cpp ]
[ run unicode/test_utf.cpp ]
+ [ run unicode/test_locale.cpp ]
;
test-suite example : ../example//unicode-examples ;
Modified: sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -1,39 +1,18 @@
//[ test_codecvt
/*`
-This test/example builds a codecvt facet that transcodes from
+This test/example shows how to use a codecvt facet that transcodes from
wide chars (UTF-16 or UTF-32) to UTF-8 on the way out, and that
does the opposite on the way in, but normalizes the string as well.
*/
#define BOOST_TEST_MODULE Codecvt
#include <boost/test/included/unit_test.hpp>
-#include <boost/iterator/converter_codecvt_facet.hpp>
-#include <boost/unicode/utf.hpp>
-#include <boost/unicode/compose.hpp>
+#include <boost/unicode/codecvt.hpp>
#include <fstream>
#include <boost/range/algorithm.hpp>
#include <boost/range/as_literal.hpp>
-typedef boost::converter_codecvt_facet<
- wchar_t,
- boost::unicode::utf_boundary,
- boost::unicode::utf_transcoder<char>,
- boost::unicode::utf_combine_boundary,
- boost::multi_converter<
- boost::converted_converter<boost::unicode::utf_decoder, boost::unicode::normalizer>,
- boost::unicode::utf_encoder<wchar_t>
- >
-> utf_u8_normalize_codecvt;
-
-typedef boost::converter_codecvt_facet<
- wchar_t,
- boost::unicode::utf_boundary,
- boost::unicode::utf_transcoder<char>,
- boost::unicode::utf_boundary,
- boost::unicode::utf_transcoder<wchar_t>
-> utf_u8_codecvt;
-
BOOST_AUTO_TEST_CASE( codecvt )
{
@@ -46,7 +25,7 @@
boost::iterator_range<const wchar_t*> data_normalized = data;//boost::as_literal(data_normalized_);
std::locale old_locale;
- std::locale utf8_locale(old_locale, new utf_u8_codecvt());
+ std::locale utf8_locale(old_locale, new boost::unicode::utf_u8_codecvt());
// Set a new global locale
//std::locale::global(utf8_locale);
Added: sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp 2010-08-01 17:15:02 EDT (Sun, 01 Aug 2010)
@@ -0,0 +1,59 @@
+#define BOOST_TEST_MODULE Locale
+#include <boost/test/included/unit_test.hpp>
+
+#include <boost/unicode/codecvt.hpp>
+#include <boost/unicode/utf.hpp>
+
+#include <boost/range/as_literal.hpp>
+#include <iostream>
+
+#include "range_test.hpp"
+
+namespace unicode = boost::unicode;
+using boost::char32;
+
+typedef boost::converter_codecvt<
+ wchar_t,
+ boost::unicode::utf_boundary,
+ boost::unicode::utf_transcoder<char>,
+ boost::unicode::utf_boundary,
+ boost::unicode::utf_transcoder<wchar_t>
+> utf_u8_codecvt;
+
+BOOST_AUTO_TEST_CASE( locale_custom )
+{
+ std::locale old_locale;
+ std::locale loc(old_locale, new utf_u8_codecvt);
+
+ char input_utf8_[] = "hello \xc3\xa9 \xf0\xaa\x98\x80 world";
+ boost::iterator_range<const char*> input_utf8 = boost::as_literal(input_utf8_);
+
+ char32 output[] = {'h', 'e', 'l', 'l', 'o', ' ', 0xE9, ' ', 0x2A600, ' ', 'w', 'o', 'r', 'l', 'd'};
+
+ std::cout << "------------ locale_decode custom ----------------\n" << std::endl;
+ CHECK_EQUALS(unicode::adaptors::locale_decode(input_utf8, loc), output);
+ std::cout << "------------ locale_encode custom ----------------\n" << std::endl;
+ CHECK_EQUALS(unicode::adaptors::locale_encode(output, unicode::locale_encoder(unicode::utf_encoder<wchar_t>(), unicode::utf_locale_transcoder(loc))), input_utf8);
+
+}
+
+BOOST_AUTO_TEST_CASE( locale_native )
+{
+#ifdef BOOST_WINDOWS
+ char input_native_[] = "hello \u00E9 world";
+#else
+ char input_native_[] = "hello \u00E9 \U0002A600 world";
+#endif
+ boost::iterator_range<const char*> input_native = boost::as_literal(input_native_);
+
+#ifdef BOOST_WINDOWS
+ char32 output_native[] = {'h', 'e', 'l', 'l', 'o', ' ', 0xE9, ' ', 'w', 'o', 'r', 'l', 'd'};
+#else
+ char32 output_native[] = {'h', 'e', 'l', 'l', 'o', ' ', 0xE9, ' ', 0x2A600, ' ', 'w', 'o', 'r', 'l', 'd'};
+#endif
+
+ std::cout << "------------ locale_decode ----------------\n" << std::endl;
+ CHECK_EQUALS(unicode::adaptors::locale_decode(input_native), output_native);
+ std::cout << "------------ locale_encode ----------------\n" << std::endl;
+ CHECK_EQUALS(unicode::adaptors::locale_encode(output_native), input_native);
+}
Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk