Boost logo

Boost-Commit :

Subject: [Boost-commit] svn:boost r64688 - in sandbox/SOC/2009/unicode: boost/iterator boost/unicode libs/unicode/doc libs/unicode/example libs/unicode/test/iterator libs/unicode/test/unicode
From: loufoque_at_[hidden]
Date: 2010-08-08 20:58:53


Author: mgaunard
Date: 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
New Revision: 64688
URL: http://svn.boost.org/trac/boost/changeset/64688

Log:
base64 example and make codecvt actually use boundary checkers
Added:
   sandbox/SOC/2009/unicode/libs/unicode/example/base64.cpp (contents, props changed)
Text files modified:
   sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp | 11 +
   sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp | 256 +++++++++++++++++++++++++++------------
   sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp | 29 ++++
   sandbox/SOC/2009/unicode/libs/unicode/doc/autodoc1c.xml | 2
   sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk | 7
   sandbox/SOC/2009/unicode/libs/unicode/example/Jamfile.v2 | 1
   sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp | 49 ++++++-
   sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp | 23 ++-
   8 files changed, 272 insertions(+), 106 deletions(-)

Modified: sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/iterator/codecvt_converter.hpp 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -37,6 +37,8 @@
     template<typename In, typename Out>
     Out ltr(In& begin, In end, Out out)
     {
+ In old_begin = begin;
+
         std::mbstate_t state;
         memset(&state, 0, sizeof state);
         
@@ -67,11 +69,9 @@
                 const_cast<Input*>(from_next)[i] = *begin++;
             }
             
- if(to_next - buffer_out)
+ if(to_next - buffer_out || begin == end)
                 break;
-
- if(begin == end)
- throw std::out_of_range("unexpected end");
+
             *const_cast<Input*>(from_next) = *begin++;
         }
         
@@ -94,6 +94,9 @@
         }
         while(to_next != old_to_next);
     
+ // restore begin to the position given by the final 'from_next'
+ std::advance(old_begin, from_next - buffer_in);
+ begin = old_begin;
         return std::copy(buffer_out, to_next, out);
     }
     

Modified: sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/iterator/converter_codecvt.hpp 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -11,12 +11,34 @@
 #include <algorithm>
 
 #include <map>
-#include <boost/range/algorithm.hpp>
 #include <boost/range/join.hpp>
+#include <boost/circular_buffer.hpp>
 
 namespace boost
 {
 
+namespace detail
+{
+ template<typename A, typename B, typename T>
+ struct other;
+
+ template<typename A>
+ struct other<A, A, A>
+ {
+ typedef A type;
+ };
+ template<typename A, typename B>
+ struct other<A, B, A>
+ {
+ typedef B type;
+ };
+ template<typename A, typename B>
+ struct other<A, B, B>
+ {
+ typedef A type;
+ };
+}
+
 /** Builds a codecvt facet from two \c \xmlonly<conceptname>Converter</conceptname>s\endxmlonly
  * and two \c \xmlonly<conceptname>BoundaryChecker</conceptname>s\endxmlonly.
  * When writing to a file, \c P1 is applied for segments of data on which \c B1 is true at the beginning and at the end.
@@ -36,27 +58,132 @@
     BOOST_CONCEPT_ASSERT((Convertible<InternT, typename P1::input_type>));
     BOOST_CONCEPT_ASSERT((Convertible<typename P2::output_type, InternT>));
     
- explicit converter_codecvt(const B1& b1_ = B1(), const P1& p1_ = P1(), const B2& b2_ = B2(), const P2& p2_ = P2(), std::size_t refs = 0)
- : std::codecvt<intern_type, extern_type, state_type>(refs), b1(b1_), p1(p1_), b2(b2_), p2(p2_)
+ typedef typename std::basic_ios<InternT>::pos_type pos_type;
+
+ explicit converter_codecvt(pos_type file_size_ = (pos_type)-1, const B1& b1_ = B1(), const P1& p1_ = P1(), const B2& b2_ = B2(), const P2& p2_ = P2(), std::size_t refs = 0)
+ : std::codecvt<intern_type, extern_type, state_type>(refs), file_size(file_size_), b1(b1_), p1(p1_), b2(b2_), p2(p2_)
     {
     }
     
 private:
+ pos_type file_size;
+
+ template<typename T>
     struct state_t
     {
- intern_type pending_data[P2::max_output::value];
- size_t pending_size;
+ boost::circular_buffer<T> pending_data;
+ pos_type read_size;
+
+ // size of storage is maximum size of input, which is not exposed
+ // by Converters, so we just take an arbitrary max size
+ state_t() : pending_data(64), read_size(0)
+ {
+ }
+ };
+
+ struct state_pair_t
+ {
+ state_t<extern_type> in;
+ state_t<intern_type> out;
+
+#ifdef BOOST_MSVC
+ // MSVC only calls 'in' step-by-step, so that is enough storage
+ intern_type pending_write_data[P2::max_output::value];
+ size_t pending_write_index;
+ size_t pending_write_size;
+
+ state_pair_t() : pending_write_index(0), pending_write_size(0)
+ {
+ }
+#endif
+
+ };
+
+ template<typename T>
+ struct other : detail::other<intern_type, extern_type, T>
+ {
     };
- mutable std::map<state_type*, state_t> states;
+
+ mutable std::map<state_type*, state_pair_t> states;
     
     mutable B1 b1;
     mutable P1 p1;
     
     mutable B2 b2;
     mutable P2 p2;
-
-protected:
 
+ template<typename B, typename P, typename T>
+ std::codecvt_base::result do_(
+ B& b, P& p,
+ state_t<T>& st,
+ const T* from, const T* from_end, const T*& from_next,
+ typename other<T>::type* to, typename other<T>::type* to_end, typename other<T>::type*& to_next
+ ) const
+ {
+ typedef const boost::iterator_range<typename circular_buffer<T>::const_iterator> range_circular;
+ typedef const boost::iterator_range<const T*> range_base;
+ typedef boost::range_detail::join_iterator<typename circular_buffer<T>::const_iterator, const T*> iterator;
+
+ from_next = from;
+ to_next = to;
+
+ // our real input is the concatenated pending data and the given input
+ boost::joined_range<range_circular, range_base> input = boost::join(
+ range_circular(st.pending_data.begin(), st.pending_data.end()),
+ range_base(from, from_end)
+ );
+
+ iterator from2 = input.begin();
+ iterator from_next2 = from2;
+ iterator from_end2 = input.end();
+
+ // while we have some input
+ while(from_next2 != from_end2)
+ {
+ iterator from_boundary = from_next2;
+ do
+ {
+ ++from_boundary;
+ }
+ while(from_boundary != from_end2 && !b(from_next2, from_end2, from_boundary));
+
+ bool eof = st.read_size + pos_type(from_end2 - from_next2) == file_size
+ || from == from_end;
+
+ // boundary not found and not end of file, we append the trailing data to 'pending'
+ if(from_boundary == from_end2 && !eof)
+ {
+ std::copy(from_next, from_end, std::back_inserter(st.pending_data));
+ from_next = from_end;
+ return std::codecvt_base::ok;
+ }
+
+ size_t written = from_boundary - from_next2;
+ try
+ {
+ to_next = p.ltr(from_next2, from_boundary, to_next);
+ }
+ catch(...)
+ {
+ return std::codecvt_base::error;
+ }
+
+ // erase the consumed pending data and update 'from_next'
+ st.read_size += written;
+ if(written >= st.pending_data.size())
+ {
+ from_next += written - st.pending_data.size();
+ st.pending_data.clear();
+ }
+ else
+ {
+ st.pending_data.erase_begin(written);
+ }
+ }
+ return std::codecvt_base::ok;
+ }
+
+protected:
     virtual std::codecvt_base::result do_in(
         state_type& state,
         const extern_type* from,
@@ -67,34 +194,43 @@
         intern_type*& to_next
     ) const
     {
- state_t& st = states[&state];
+#if BOOST_MSVC
+ state_pair_t& st = states[&state];
         
- from_next = from;
         to_next = to;
+ from_next = from;
         
- if(st.pending_size)
+ // MSVC only supports getting one result per call, so we use a hack
+ if(st.pending_write_size)
         {
- *to_next++ = st.pending_data[0];
- std::copy(st.pending_data + 1, st.pending_data + st.pending_size, st.pending_data);
- st.pending_size--;
+ *to_next++ = st.pending_write_data[st.pending_write_index];
+ st.pending_write_index++;
+ st.pending_write_size--;
+
+ if(!st.pending_write_size)
+ from_next++;
+
             return std::codecvt_base::ok;
         }
         
- if(from_next == from_end)
- return std::codecvt_base::ok;
-
- try
- {
- st.pending_size = p2.ltr(from_next, from_end, st.pending_data) - st.pending_data;
- *to_next++ = st.pending_data[0];
- std::copy(st.pending_data + 1, st.pending_data + st.pending_size, st.pending_data);
- st.pending_size--;
- }
- catch(...)
+ intern_type* to_next2;
+ std::codecvt_base::result result = do_(b2, p2, states[&state].in, from, from_end, from_next, st.pending_write_data, st.pending_write_data + st.pending_write_size, to_next2);
+ st.pending_write_size = to_next2 - st.pending_write_data;
+
+ if(st.pending_write_size)
         {
- return std::codecvt_base::partial;
+ *to_next++ = st.pending_write_data[0];
+ st.pending_write_index = 1;
+ st.pending_write_size--;
         }
- return std::codecvt_base::ok;
+
+ if(st.pending_write_size)
+ from_next--;
+
+ return result;
+#else
+ return do_(b2, p2, states[&state].in, from, from_end, from_next, to, to_end, to_next);
+#endif
     }
 
     virtual std::codecvt_base::result do_out(
@@ -107,52 +243,7 @@
         extern_type*& to_next
     ) const
     {
- typedef const boost::iterator_range<const intern_type*> range_base;
- typedef boost::range_detail::join_iterator<const intern_type*, const intern_type*> iterator;
-
- state_t& st = states[&state];
-
- from_next = from;
- to_next = to;
-
- boost::joined_range<range_base, range_base> input = boost::join(
- range_base(st.pending_data, st.pending_data + st.pending_size),
- range_base(from, from_end)
- );
-
- iterator from2 = input.begin();
- iterator from_next2 = from2;
- iterator from_end2 = input.end();
-
- while(from_next2 != from_end2)
- {
- try
- {
- to_next = p1.ltr(from_next2, from_end2, to_next);
- }
- catch(...)
- {
- size_t written = from_next2 - from2;
- if(written >= st.pending_size)
- {
- from_next += (from_next2 - from2) - st.pending_size;
- st.pending_size = 0;
- }
-
- boost::copy(range_base(from_next, from_end), st.pending_data + st.pending_size);
- st.pending_size += (from_end - from_next);
- from_next = from_end;
- return std::codecvt_base::ok;
- }
- }
-
- size_t written = from_next2 - from2;
- if(written >= st.pending_size)
- {
- from_next += (from_next2 - from2) - st.pending_size;
- st.pending_size = 0;
- }
- return std::codecvt_base::ok;
+ return do_(b1, p1, states[&state].out, from, from_end, from_next, to, to_end, to_next);
     }
 
     virtual bool do_always_noconv() const throw()
@@ -167,12 +258,15 @@
         extern_type*& to_next
     ) const
     {
- state_t& st = states[&state];
+ typedef typename circular_buffer<intern_type>::const_iterator iterator;
+ state_t<intern_type>& st = states[&state].out;
         
         to_next = to;
- const intern_type* from = st.pending_data;
- const intern_type* from_next = from;
- const intern_type* from_end = st.pending_data + st.pending_size;
+
+ // we output the pending 'out' data
+ iterator from = st.pending_data.begin();
+ iterator from_next = from;
+ iterator from_end = st.pending_data.end();
         
         while(from_next != from_end)
         {
@@ -185,8 +279,7 @@
                 return std::codecvt_base::error;
             }
         }
-
- st.pending_size = 0;
+ st.pending_data.clear();
         return std::codecvt_base::ok;
     }
 
@@ -195,6 +288,7 @@
         return 0;
     }
 
+ // probably needs fixing, but no implementation uses this
     virtual int do_length(
         state_type&,
         const extern_type* from,
@@ -219,7 +313,9 @@
 
     virtual int do_max_length() const throw ()
     {
- return P1::max_output::value;
+ // maximum size of P2's input, which is not exposed
+ // by Converters, so we just take an arbitrary max size
+ return 64;
     }
 };
     

Modified: sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp
==============================================================================
--- sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp (original)
+++ sandbox/SOC/2009/unicode/boost/unicode/utf_codecs.hpp 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -81,6 +81,29 @@
         boost::throw_exception(e);
 }
 
+ template<typename Iterator>
+ bool distance_greater_impl(Iterator begin, Iterator end, typename std::iterator_traits<Iterator>::difference_type min, std::random_access_iterator_tag*)
+ {
+ return (end - begin) >= min;
+ }
+
+ template<typename Iterator>
+ bool distance_greater_impl(Iterator begin, Iterator end, typename std::iterator_traits<Iterator>::difference_type min, std::input_iterator_tag*)
+ {
+ for(typename std::iterator_traits<Iterator>::difference_type i=0; begin != end; ++begin, ++i)
+ {
+ if(i == min)
+ return true;
+ }
+ return false;
+ }
+
+ template<typename Iterator>
+ bool distance_greater(Iterator begin, Iterator end, typename std::iterator_traits<Iterator>::difference_type min)
+ {
+ return distance_greater_impl(begin, end, min, (typename std::iterator_traits<Iterator>::iterator_category*)0);
+ }
+
 } // namespace detail
 
 /** Model of \c \xmlonly<conceptname>OneManyConverter</conceptname>\endxmlonly
@@ -216,7 +239,8 @@
         BOOST_ASSERT(pos != begin);
         BOOST_ASSERT(pos != end);
         
- return !is_surrogate(*pos) || is_high_surrogate(*pos);
+ return !is_surrogate(*pos)
+ || (is_high_surrogate(*pos) && detail::distance_greater(pos, end, 2));
     }
 };
 
@@ -399,7 +423,8 @@
         BOOST_ASSERT(pos != end);
         
         unsigned char c = *pos;
- return (c & 0x80) == 0 || (c & 0xc0) == 0xc0;
+ return (c & 0x80) == 0
+ || ((c & 0xc0) == 0xc0 && detail::distance_greater(pos, end, detail::utf8_byte_count(c)));
     }
 };
 

Modified: sandbox/SOC/2009/unicode/libs/unicode/doc/autodoc1c.xml
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/doc/autodoc1c.xml (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/doc/autodoc1c.xml 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -1,5 +1,5 @@
 <?xml version="1.0" standalone="yes"?>
-<library-reference xmlns:xi="http://www.w3.org/2001/XInclude" id="iterator_range_reference">
+<library-reference xmlns:xi="http://www.w3.org/2001/XInclude" id="converters_and_segmenters_reference">
 <title>Iterator/Range reference</title>
 
 <xi:include href="concepts/Converter.xml"/>

Modified: sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/doc/users_manual.qbk 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -505,11 +505,16 @@
 Unfortunately, it appears it is only possible to use this mechanism with codecvt facets that have =char= as external and either
 =char= or =wchar_t= as internal, but C++0x may improve the situation.
 
-To use [classref boost::converter_codecvt], which allows to build a codecvt facet from converters, you will need two [conceptref Converter]s, one for each direction, as well as two [conceptref BoundaryChecker]s.
+To use [classref boost::converter_codecvt], which allows to build a codecvt facet from converters, you will need two [conceptref Converter]s, one
+for each direction, as well as two [conceptref BoundaryChecker]s.
 Indeed, as codecvt facets are passed arbitrary input buffers, there needs to be a way to tell what is the right boundaries to apply the steps on.
 An alternative would be to try to apply a step and try again if there was an error due to incomplete data. This is however not sufficient for
 converters that are not stable by concatenation.
 
+Unfortunately, codecvt facets do not provide a way to identify the end of the input in the file to memory case -- even though they do for the
+other way around -- and the generic mechanism used to build codecvt facets needs this.
+As a workaround, you can provide the size of the file to the codecvt facet's constructor.
+
 You may also build converters out of codecvt facets with [classref boost::codecvt_in_converter] or [classref boost::codecvt_out_converter], or
 directly convert locales to UTF-32 with [classref boost::unicode::locale_decoder] or [classref boost::unicode::locale_encoder].
 

Modified: sandbox/SOC/2009/unicode/libs/unicode/example/Jamfile.v2
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/example/Jamfile.v2 (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/example/Jamfile.v2 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -25,4 +25,5 @@
     [ run compose.cpp ]
     [ run search.cpp ]
     [ run source_input.cpp ]
+ [ run base64.cpp ]
 ;

Added: sandbox/SOC/2009/unicode/libs/unicode/example/base64.cpp
==============================================================================
--- (empty file)
+++ sandbox/SOC/2009/unicode/libs/unicode/example/base64.cpp 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -0,0 +1,115 @@
+#include <boost/iterator/convert_iterator.hpp>
+#include <boost/iterator/converter_codecvt.hpp>
+
+#include <boost/range/as_literal.hpp>
+
+#include <fstream>
+
+struct base64_encoder
+{
+ typedef char input_type;
+ typedef char output_type;
+
+ typedef boost::mpl::int_<4> max_output;
+
+ template<typename In, typename Out>
+ Out ltr(In& begin, In end, Out out)
+ {
+ const char * lookup_table =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789"
+ "+/";
+
+ char in[3];
+
+ in[0] = *begin++;
+ *out++ = lookup_table[in[0] >> 2];
+ if(begin == end)
+ {
+ *out++ = lookup_table[(in[0] & 0x03) << 4];
+ *out++ = '=';
+ *out++ = '=';
+ return out;
+ }
+
+ in[1] = *begin++;
+ *out++ = lookup_table[((in[0] & 0x03) << 4) | (in[1] >> 4)];
+ if(begin == end)
+ {
+ *out++ = lookup_table[(in[1] & 0x0f) << 2];
+ *out++ = '=';
+ return out;
+ }
+
+ in[2] = *begin++;
+ *out++ = lookup_table[((in[1] & 0x0f) << 2) | (in[2] >> 6)];
+ *out++ = lookup_table[in[2] & 0x3f];
+ return out;
+ }
+
+ template<typename In, typename Out>
+ Out rtl(In begin, In& end, Out out)
+ {
+ size_t to_read = std::distance(begin, end) % 3;
+ if(!to_read)
+ to_read = 3;
+
+ char in[3];
+ for(size_t i=0; i<to_read; i++)
+ in[to_read-i-1] = *--end;
+
+ char* b = in;
+ return ltr(b, in+to_read, out);
+ }
+};
+
+template<std::size_t N>
+struct fixed_boundary
+{
+ typedef char input_type;
+
+ template<typename In>
+ bool operator()(In begin, In end, In pos)
+ {
+ return !(std::distance(begin, pos) % N);
+ }
+};
+
+typedef boost::converter_codecvt<
+ char,
+ fixed_boundary<3>,
+ base64_encoder,
+ fixed_boundary<3>,
+ base64_encoder
+> base64_codecvt;
+
+#define CHECK_EQUAL(a, b) if(a != b) std::unexpected();
+
+int main()
+{
+ char data_in_[] = "fooba";
+ char data_out_[] = "Zm9vYmE=";
+
+ boost::iterator_range<const char*> data_in = boost::as_literal(data_in_);
+ boost::iterator_range<const char*> data_out = boost::as_literal(data_out_);
+
+ std::locale old_loc;
+ std::locale loc(old_loc, new base64_codecvt(boost::size(data_in)));
+ {
+ std::ofstream ofs("test.base64");
+ ofs << data_in;
+ }
+
+ std::ifstream ifs("test.base64");
+ ifs.imbue(loc);
+
+ char c;
+ size_t i = 0;
+ while(ifs.get(c))
+ {
+ CHECK_EQUAL(c, data_out[i]);
+ ++i;
+ }
+ CHECK_EQUAL(i, (size_t)boost::size(data_out));
+}

Modified: sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/iterator/test_codecvt.cpp 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -2,7 +2,8 @@
 /*`
 This test/example shows how to use a codecvt facet that transcodes from
 wide chars (UTF-16 or UTF-32) to UTF-8 on the way out, and that
-does the opposite on the way in, but normalizes the string as well.
+does the opposite on the way in.
+It also demonstrates a variant that normalizes data read from the file.
 */
 #define BOOST_TEST_MODULE Codecvt
 #include <boost/test/included/unit_test.hpp>
@@ -13,19 +14,48 @@
 #include <boost/range/algorithm.hpp>
 #include <boost/range/as_literal.hpp>
 
+// e\u0301 is \u00E9
+// \U0002FA1D is \U0002A600
+const wchar_t data_[] = L"hello e\u0301 \U0002FA1D world";
+boost::iterator_range<const wchar_t*> data = boost::as_literal(data_);
+
+const wchar_t data_normalized_[] = L"hello \u00E9 \U0002A600 world";
+boost::iterator_range<const wchar_t*> data_normalized = boost::as_literal(data_normalized_);
 
 BOOST_AUTO_TEST_CASE( codecvt )
 {
- // e\u0301 is \u00E9
- // \U0002FA1D is \U0002A600
- const wchar_t data_[] = L"hello e\u0301 \U0002FA1D world";
- boost::iterator_range<const wchar_t*> data = boost::as_literal(data_);
-
- const wchar_t data_normalized_[] = L"hello \u00E9 \U0002A600 world";
- boost::iterator_range<const wchar_t*> data_normalized = data;//boost::as_literal(data_normalized_);
+ std::locale old_locale;
+ std::locale utf8_locale(old_locale, new boost::unicode::utf_u8_codecvt(20));
+
+ // Set a new global locale
+ //std::locale::global(utf8_locale);
+
+ // Send the UTF-X data out, converting to UTF-8
+ {
+ std::wofstream ofs("data.ucd");
+ ofs.imbue(utf8_locale);
+ boost::copy(data, std::ostream_iterator<wchar_t, wchar_t>(ofs));
+ }
+
+ // Read the UTF-8 data back in, converting to UTF-X and normalizing on the way in
+ {
+ std::wifstream ifs("data.ucd");
+ ifs.imbue(utf8_locale);
+ wchar_t item = 0;
+ size_t i = 0;
+ while (ifs >> std::noskipws >> item)
+ {
+ BOOST_CHECK_EQUAL(data[i], item);
+ i++;
+ }
+ BOOST_CHECK_EQUAL(i, (size_t)boost::size(data));
+ }
+}
 
+BOOST_AUTO_TEST_CASE( codecvt_normalized )
+{
     std::locale old_locale;
- std::locale utf8_locale(old_locale, new boost::unicode::utf_u8_codecvt());
+ std::locale utf8_locale(old_locale, new boost::unicode::utf_u8_normalize_codecvt(20));
 
     // Set a new global locale
     //std::locale::global(utf8_locale);
@@ -51,4 +81,5 @@
         BOOST_CHECK_EQUAL(i, (size_t)boost::size(data_normalized));
     }
 }
+
 //]

Modified: sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp
==============================================================================
--- sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp (original)
+++ sandbox/SOC/2009/unicode/libs/unicode/test/unicode/test_locale.cpp 2010-08-08 20:58:42 EDT (Sun, 08 Aug 2010)
@@ -12,18 +12,10 @@
 namespace unicode = boost::unicode;
 using boost::char32;
 
-typedef boost::converter_codecvt<
- wchar_t,
- boost::unicode::utf_boundary,
- boost::unicode::utf_transcoder<char>,
- boost::unicode::utf_boundary,
- boost::unicode::utf_transcoder<wchar_t>
-> utf_u8_codecvt;
-
 BOOST_AUTO_TEST_CASE( locale_custom )
 {
     std::locale old_locale;
- std::locale loc(old_locale, new utf_u8_codecvt);
+ std::locale loc(old_locale, new unicode::utf_u8_codecvt());
     
     char input_utf8_[] = "hello \xc3\xa9 \xf0\xaa\x98\x80 world";
     boost::iterator_range<const char*> input_utf8 = boost::as_literal(input_utf8_);
@@ -34,7 +26,20 @@
     CHECK_EQUALS(unicode::adaptors::locale_decode(input_utf8, loc), output);
     std::cout << "------------ locale_encode custom ----------------\n" << std::endl;
     CHECK_EQUALS(unicode::adaptors::locale_encode(output, unicode::locale_encoder(unicode::utf_encoder<wchar_t>(), unicode::utf_locale_transcoder(loc))), input_utf8);
+}
+
+BOOST_AUTO_TEST_CASE( locale_custom_normalize )
+{
+ std::locale old_locale;
+ std::locale loc(old_locale, new unicode::utf_u8_normalize_codecvt());
     
+ char input_utf8_[] = "hello e\xcc\x81 \xf0\xaf\xa8\x9d world";
+ boost::iterator_range<const char*> input_utf8 = boost::as_literal(input_utf8_);
+
+ char32 output[] = {'h', 'e', 'l', 'l', 'o', ' ', 0xE9, ' ', 0x2A600, ' ', 'w', 'o', 'r', 'l', 'd'};
+
+ std::cout << "------------ locale_decode custom normalize ----------------\n" << std::endl;
+ CHECK_EQUALS(unicode::adaptors::locale_decode(input_utf8, loc), output);
 }
 
 BOOST_AUTO_TEST_CASE( locale_native )


Boost-Commit list run by bdawes at acm.org, david.abrahams at rcn.com, gregod at cs.rpi.edu, cpdaniel at pacbell.net, john at johnmaddock.co.uk